
configfile: "config.yaml"

import sequana.snaketools as sm


# Should use a config file
_fastqc = sm.FileFactory("fastq_raw/*gz")
_fastqc_bwa_fix = sm.FileFactory("bwa_fix/*gz")


rule fastqc:
    """Calls FastQC on each input datasets + those in bwa_fix directory"""
    # we could have a dynamic search or based on config file
    input:
        expand("fastq_raw/{dataset}", dataset=_fastqc.dataset),
        expand("bwa_fix/{dataset}", dataset=_fastqc_bwa_fix.dataset),
    output:
        expand("fastqc/{dataset}_fastqc.html", dataset=_fastqc.dataset_noexts),
        expand("fastqc/{dataset}_fastqc.html", dataset=_fastqc_bwa_fix.dataset_noexts),
        touch("fastqc/fastqc.done")
    params:
        wkdir="fastqc",
        kargs = config['fastqc']['kargs']
    threads: 6
    log:
        "fastqc.log",
    run:
        # if the content of the file is empty, this will fail. We need to
        # touch  a file in such case.
        from sequana import FastQ
        newinput = []
        for i, this in enumerate(input):
            fastq = FastQ(this)
            if len(fastq) != 0:
                newinput.append(this)
            else:
                with open(output[i], "w") as fh:
                    fh.write("No data in %s" % output[i])
        shell("fastqc -t {threads} --outdir {params.wkdir} -f fastq {newinput} {params.kargs} > {params.wkdir}/{log}")


rule fastqc_output:
    input: "fastqc.done"


rule fastqc_cleanup:
    """Remove the contents of the fastqc directory"""
    params:
        wkdir = "fastqc",
    run:
        import glob
        filenames = glob.glob(params['wkdir']+'/*')
        for filename in filenames:
            print('removing %s' % filename)
            os.remove(filename)



