#!/usr/bin/env python

from pyPheWAS.pyPhewasCore import *
import pandas as pd
import sys, os

optargs = {
	'--phenotypefiles': 'phenotypes',
	'--groupfiles':'groups',
	'--path':'path',
	'--phenotypeout': 'phenout',
	'--groupout': 'groupout',
}

"""
Retrieve and validate all arguments.
"""

args = sys.argv[1:]

# Define any default arguments
kwargs = {'path':'.'}

kwargs = process_args(kwargs, optargs, *args)

# Change path to absolute path
kwargs['path'] = os.path.join(os.path.abspath(kwargs['path']),'')

print(kwargs)


# Assert that valid files were given
assert kwargs['phenotypes'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotypes'])
assert kwargs['groups'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['groups'])

# Assert that the output file is valid
assert kwargs['phenout'].endswith('.csv'), "%s is not a valid output file, must be a .csv file" % (kwargs['phenout'])
assert kwargs['groupout'].endswith('.csv'), "%s is not a vailid output file, must be a .csv file" % (kwargs['groupout'])

# Print Arguments
display_kwargs(kwargs)

# Make all arguments local variables
locals().update(kwargs)

icd9sDF = [pd.read_csv(os.path.abspath(os.sep.join([path,element]))) for element in phenotypes.split('+')]
groupsDF = [pd.read_csv(os.path.abspath(os.sep.join([path,element]))) for element in groups.split('+')]

ng = pd.concat(groupsDF)
ng.drop_duplicates('id', inplace=True)
ni = pd.concat(icd9sDF)
ni.drop_duplicates(inplace=True)

ni.to_csv(os.path.abspath(os.sep.join([path,element])))
ng.to_csv(os.path.abspath(os.sep.join([path,element])))
