#!/usr/bin/env python
# coding=utf-8
import argparse


def main():
    parser = argparse.ArgumentParser(description='chinese language tools command line.')
    parser.add_argument("action", help="action to perform, including extract, cut, learn, word2vec, similar")
    parser.add_argument("input", nargs="?", help="input for the action")
    parser.add_argument("-o", "--output-file", help="file to save output")
    parser.add_argument("-m", "--stored-model", help="saved word2vec model")
    parser.add_argument("-e", "--encoding", default="utf-8", help="specify file encoding")
    parser.add_argument("-t", "--source-type", default="qq", help="source type of the input, supported types are qq")
    args = parser.parse_args()

    if args.action == "extract":
        from cla.util import chat_util
        if not args.input:
            print "Source file is required!"
            return

        print "Prepare extracting sentences from " + args.input
        if args.source_type == "qq":
            chat_util.process_qq_history(args.input, encoding=args.encoding, output_path=args.output_file)
            print "Done."
        else:
            print "Unsupported type: " + args.source_type

    elif args.action == "cut":
        from cla.util import file_util
        if not args.input:
            print "Sentence input is required!"
            return

        print "Cutting sentences in " + args.input
        file_util.cut_words_in(args.input, encoding=args.encoding, output_path=args.output_file)
        print "Done."

    elif args.action == "learn":
        from cla.learn.word2vec import VectorModel
        if not args.input:
            print "Corpus input is required!"
            return

        if args.stored_model:
            model = VectorModel(source_file_path=args.stored_model)
            result_file = args.stored_model
            update = True
            print "Loaded previous model: " + args.stored_model
        else:
            result_file = "model.dat"
            model = VectorModel()
            update = False

        print "Learning vector presentation of " + args.input
        model.train(source_corpus_path=args.input, update=update)

        if args.output_file:
            result_file = args.output_file
        model.save(result_file)
        print "Done. Model saved to " + result_file

    elif args.action == "word2vec":
        from cla.learn.word2vec import VectorModel
        if not args.input:
            print "A input word is required!"
            return
        if not args.stored_model:
            print "A stored word2vec model is required! (--stored-model)"
            return

        model = VectorModel(source_file_path=args.stored_model)
        print model.to_vector(args.input)

    elif args.action == "similar":
        from cla.learn.word2vec import VectorModel
        if not args.input:
            print "A input word is required!"
            return
        if not args.stored_model:
            print "A stored word2vec model is required! (--stored-model)"
            return

        model = VectorModel(source_file_path=args.stored_model)
        for word, similarity in model.model.similar_by_word(args.input.decode(args.encoding)):
            print word.encode(args.encoding) + "\t%.5f" % similarity

    else:
        print "Unsupported action: " + args.action

if __name__ == '__main__':
    main()
