"""
Author:
Affiliation:
Aim:
Data:
Run: snakemake -s Snakefile
Changelog:
"""
import sequana
from sequana import snakemake as sm

# Change parameters inside config.json
configfile: "config.json"


workdir: "."
sm.message("Sequana pipeline biomics")
sm.message("The current working directory is " )

############# NOTHING to change here below

include: sm.rules['dag']

vc = sm.ValidateConfig(config)
config = vc()

get_prefixes = lambda filename: filename.split(".")[0]

if "cutadapt_params" in config.keys():
    config['cutadapt_params'] = " ".join([x + " " + y
        for x,y in config.cutadapt_params.items()])

DATA = [get_prefixes(config.R1), get_prefixes(config.R2)]

sm.message("Using %s" % DATA)

tags = {'sample':""}
if config['sample'] is not False:
    tags['sample'] = ".sample"


rule all:
    """

    """
    input: 
        # copy a subset of the raw data in input/
        #expand("input/{dataset}%(sample)s.fastq.gz" % tags, dataset=DATA),
        # remove adapters
        #expand("fastq_cutadapt/{dataset}.cutadapt%(sample)s.fastq.gz" % tags,
        #    dataset=DATA),
        # perform a quality control after adapter removal
        expand("fastqc_results/{dataset}.cutadapt%(sample)s_fastqc.zip" % tags,
            dataset=DATA),
        # a report
        "bwa_contaminant/contaminant_R1.mapped.fastq",
        "bwa_contaminant/contaminant.sam",
        "dag.svg",
        "report/index.html"
    version: sequana.version


rule sampling:
    input:  "fastq_raw/{dataset}.fastq.gz"
    output: "input/{dataset}%(sample)s.fastq.gz" % tags
    message: """
    -- Extracting only a subset of the raw data (%s reads)
    """ % config.sample
    run:
        if config['sample'] == -1:
            # symbolic link use force to avoid error if it exists already
            #shell("ln -f -s ../{input} {output}")
            shell("cp -f {input} {output}")
            shell("chmod 660 {output}")
        else:
            shell("fastq_head {input} {config.sample} {output}")


rule cutadapt:
    message: "Filtering reads for adapters and bad quality"
    input: expand("input/{dataset}%(sample)s.fastq.gz" % tags, dataset=DATA)
    output:
        R1 = "fastq_cutadapt/" + DATA[0] + ".cutadapt%(sample)s.fastq.gz" % tags,
        R2 = "fastq_cutadapt/" + DATA[1] + ".cutadapt%(sample)s.fastq.gz" % tags,
        summary = "fastq_cutadapt/cutadapt.log" 
    params: config.cutadapt_params
    shell: "cutadapt -o {output.R1} -p {output.R2} {input} {params} > {output.summary}"


rule fastqc:
    input: "fastq_cutadapt/{dataset}.cutadapt%(sample)s.fastq.gz" % tags
    output: "fastqc_results/{dataset}.cutadapt%(sample)s_fastqc.zip" % tags
    params: dir="fastqc_results"
    log: "fastqc_{dataset}.log"
    threads: 2
    shell:  "fastqc -t {threads} --outdir {params.dir} -f fastq {input} > {params.dir}/{log}"


rule bwa_fix:
    message: """
    -- Running bwa index and bwa mem
    -- information saved in {log}
    """
    input:
        contaminant = config.contaminant,
        R1 = "fastq_cutadapt/" + DATA[0] + ".cutadapt%(sample)s.fastq.gz" % tags,
        R2 = "fastq_cutadapt/" + DATA[1] + ".cutadapt%(sample)s.fastq.gz" % tags
    params:
        wkdir = "bwa_contaminant",
    output:
        bam = "bwa_contaminant/contaminant.bam",
        sam = "bwa_contaminant/contaminant.sam",
    log: "logs/bwa_fix.log"
    threads: 1
    shell:
        """
        # a local copy of contaminant from input to bwa_contaminant
        cp {config.contaminant} {params.wkdir}

        # Create index
        bwa index {params.wkdir}/{config.contaminant}

        # Run bwa
        bwa mem -t {threads} -M {params.wkdir}/{input.contaminant} {input.R1} {input.R2} > {output.sam}

        # Run samtools
        # -b output BAM
        # -T reference
        samtools view -bT {params.wkdir}/{input.contaminant} {output.sam} -o {output.bam}
        """


rule bwa_bam_to_fastq:
    message: """
    -- Extracting the fastq from the BAM/SAM files
    -- information saved in {log}
    """
    input:
        bam = "bwa_contaminant/contaminant.bam",
        sam = "bwa_contaminant/contaminant.sam",
        R1 = "fastq_cutadapt/" + DATA[0] + ".cutadapt%(sample)s.fastq.gz" % tags,
        R2 = "fastq_cutadapt/" + DATA[1] + ".cutadapt%(sample)s.fastq.gz" % tags
    params:
        wkdir = "bwa_contaminant",
    output:
        mapped = "bwa_contaminant/contaminant.mapped.sam",
        unmapped = "bwa_contaminant/contaminant.unmapped.sam",
        fastqR1 = "bwa_contaminant/R1.mapped.fastq",
        fastqR2 = "bwa_contaminant/R2.mapped.fastq"
    threads: 1
    run:
        from sequana.tools import bam_to_mapped_unmapped_fastq as bam2fastq
        stats = bam2fastq(input["bam"])


rule report:
    input:
        cutadapt1 = "fastq_cutadapt/cutadapt.log",
        dag = "dag.svg"
    output: "report/index.html"
    run:
        from sequana import report_cutadapt
        from sequana import report_main

        s = report_main.SequanaReport()
        s.create_report()

        s = report_cutadapt.CutAdaptReport("fastq_cutadapt/cutadapt.log")
        s.create_report()
        shell("cp Snakefile report/")
        shell("cp dag.svg report/")


#  "| samtools view -Sbh -o {snakemake.output[0]} -) 2> {snakemake.log}")


rule clean:
    shell:
        """
        rm -rf fastqc_results
        rm -rf fastq_sample
        rm -rf fastq_cutadapt
        rm -rf input/*fastq*
        rm -rf bwa_contaminant
        rm -f dag.svg dat.dot
        """


onsuccess:
    print("Workflow finished.")
    #shell("mail -s \"Snakemake finished\" thomas.cokelaer@pasteur.fr < {log}")

"""
onerror:
    print("An error occured")
    shell("mail -s \"an error occured\" thomas.cokelaer@pasteur.fr < {log}")
"""
