#!/usr/bin/env python3

# ############################### #
# featureCSVfromFASTA
#
# Creates a feature table with class identificiations from multiple fasta files.
#
# Anna Farrell-Sherman 3/31/19
# ############################### #

#import packages
import argparse # to format command line arguments
import sys #for command line parsing
import os #for creating file names

#import import Woolf module for creating feature tables
from woolf import featureTable

#create parser for command line 
parser = argparse.ArgumentParser(description="Build a CVS feature table from amino acid FASTA files")

#Output file info (user specified output)
parser.add_argument("-c", "--comparisonFileName", default="featureTable", help="an identifying tag for all output files")
parser.add_argument("-f", "--folder", default="", help="A folder to contain the output files")

#choose machine learning algorithm
group = parser.add_mutually_exclusive_group()
group.add_argument("-b", "--binary", action="store_true")
group.add_argument("-t", "--predict", action="store_true")

#add arguments for positive and negative class fasta files
parser.add_argument("-p", "--posFasta", nargs='+', help="one or more FASTA files containing amino acid sequences belonging to the positive class")
parser.add_argument("-n", "--negFasta", nargs='+', help="one or more FASTA files containing amino acid sequences belonging to the negative class")
parser.add_argument("-u", "--unknownFasta", nargs='+', help="one or more FASTA files containing amino acid sequences of unknown function")


args = parser.parse_args()

if len(sys.argv)==1:
	parser.print_help(sys.stderr)
	sys.exit(1)

if args.folder:
	# os.path.join() (used later) takes care of the slashes, (also lets it work on windows).
	folderName = args.folder
	if not os.path.isdir(folderName):
		os.mkdir(folderName)
	fileName = os.path.join(folderName, args.comparisonFileName + '.csv')
else:
	fileName = args.comparisonFileName + '.csv'

#Creating a binary feature table
if args.binary:
	if args.unknownFasta:
		print("Ignoring inputs to --unknownFasta to create a binary feature table: " + str(args.unknownFasta))
	if args.posFasta and args.negFasta:
		seqFeatureTable = featureTable.binaryFeatureTable(args.posFasta, args.negFasta)
		featureTable.saveCSV(seqFeatureTable, fileName)
		print('Binary Table')
		print('Saved csv file to: ' + fileName)
	else:
		print("For a binary feature table please provide both positive and negative class fasta files with [-p] and [-n]")
#creating a prediction feature table
elif args.predict:
	if args.posFasta or args.negFasta:
		print("Ignoring inputs to --posFasta and --negFasta to create a prediction feature table")
	if args.unknownFasta:
		print(args.unknownFasta)
		seqFeatureTable = featureTable.predictDataFeatureTable(args.unknownFasta)
		featureTable.saveCSV(seqFeatureTable, fileName)
		print('Prediction Table')
		print('Saved csv file to: ' + fileName)
	else:
		print("For a prediction feature table please provide a fasta file with [-u]")
#Error message and printing help if no feature table type is passed
else:
	print('Error: Please select either binary or predict as discribed below:\n')
	parser.print_help(sys.stderr) # might be good to *also* add a descriptive message
