#!/usr/bin/env python

# Copyright (c) 2011-2016 Timothy Savannah under GPLv3, All Rights Reserved. See LICENSE for more information
"""
Disttask is a utility which provides the ability to distribute a task across a fixed number of processes, for better utilization of multiprocessing.

Use it with existing single-threaded/process tools and scripts to take full advantage of your computer's resources.

"""

import os
import sys
import subprocess
import threading
import time

from collections import deque

__version__ = '2.0.1'

__version_tuple__ = (2, 0, 1)

try:
    bytes
except:
    bytes = str # Python < 2.6
    
if bytes == str:
    # Python 2, no additional decoding necessary.
    tostr = str
else:
    # Python 3, additional decoding necessary
    try:
        defaultEncoding = sys.getdefaultencoding()
    except:
        defaultEncoding = 'utf-8'
    
    def tostr(x):
        if isinstance(x, bytes) is False:
            return str(x)
        return x.decode(defaultEncoding)
        

class StdoutWriter(threading.Thread):

    def __init__(self, *args, **kwargs):
        threading.Thread.__init__(self, *args, **kwargs)

        self.stdoutData = deque()

        self.keepGoing = True

    def addData(self, data):
        self.stdoutData.append(data)

    def run(self):
        time.sleep(.001) # Block immediatly whilst setup happens
        stdoutData = self.stdoutData
        while self.keepGoing is True or len(stdoutData) > 0:
            while len(stdoutData) > 0:
                nextItem = stdoutData.popleft()
                sys.stdout.write(tostr(nextItem))
                sys.stdout.flush()
            time.sleep(.0005)

class Runner(threading.Thread):

    def __init__(self, cmd, stdoutWriter):
        threading.Thread.__init__(self)
        self.cmd = cmd
        self.stdoutWriter = stdoutWriter

    def run(self):
        pipe = subprocess.Popen(self.cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        output = pipe.stdout.read()
        pipe.wait()
        self.stdoutWriter.addData(output)

class DistTask(object):
    def __init__(self, cmd, concurrent_tasks, argset, stdoutWriter, endWhenDone=True):
        self.cmd = cmd
        self.concurrent_tasks = concurrent_tasks
        self.argset = deque(argset)
        self.stdoutWriter = stdoutWriter
        self.endWhenDone = endWhenDone

        if self.endWhenDone is False:
            self.keepGoing = True

        # keepGoing is an attribute when end

    def addToArgset(self, items):
        self.argset += items

    def addItemToArgset(self, item):
        self.argset.append(item)

    def run(self):
        argset = self.argset
        for i in range(self.concurrent_tasks):
            pipes.append(None)

        pipesRunning = -1

        stdoutWriter = self.stdoutWriter

        if self.endWhenDone is True:
            shouldKeepGoing = lambda : bool(pipesRunning != 0)
        else:
            shouldKeepGoing = lambda : bool(self.keepGoing is True or (len(self.argset) > 0 or pipesRunning > 0))

        while shouldKeepGoing():
            pipesRunning = 0
            for i in range(self.concurrent_tasks):
                if pipes[i] is None:
                    if len(argset):
                        nextItem = argset.popleft()
                        cmd = self.cmd.replace('%s', nextItem).replace('%d', str(i))
                        pipes[i] = Runner(cmd, stdoutWriter)
                        pipes[i].start()
                        pipesRunning += 1
                else:
                    if pipes[i].isAlive() is False:
                        if len(argset):
                            nextItem = argset.popleft()
                            cmd = self.cmd.replace('%s', nextItem).replace('%d', str(i))
                            pipes[i].join() # cleanup
                            pipes[i] = Runner(cmd, stdoutWriter)
                            pipes[i].start()
                            pipesRunning += 1
                    else:
                        pipesRunning += 1

            time.sleep(.0002)

        stdoutWriter.keepGoing = False

if (__name__ == "__main__"):
    if '--version' in sys.argv[1:3]:
        sys.stderr.write('Disttask version %s\n' %(__version__,))
        sys.exit(0)

    if len(sys.argv) < 4 or '--help' in sys.argv[1:3]:
        sys.stderr.write("Usage: " + os.path.basename(sys.argv[0]) + " [cmd] [concurrent tasks] [argset]\n\n")
        sys.stderr.write("Use a %s in [cmd] where you want the args to go. use %d for the pipe number.\nTo run a list of commands, make '%s' be your full command.\n\n")
        sys.stderr.write("If argset is '--', the items will be read from stdin instead of providing the arguments to disttask.\n  Execution will start immediately, so you can have disttask manage processing items that another program is feeding in.\n")
        sys.stderr.write("\nDisttask version " + __version__ + "\n")
        sys.exit(1)


    pipes = []


    cmd = sys.argv[1]
    if cmd.find('%s') == -1:
        sys.stderr.write("No %s in command!\n")
        sys.exit(1)
    concurrent_tasks = int(sys.argv[2])
    argset = sys.argv[3:]

    stdoutWriter = StdoutWriter()
    stdoutWriter.start()

    if len(argset) == 1 and argset[0] == '--':
        runner = DistTask(cmd, concurrent_tasks, [], stdoutWriter, endWhenDone=False)
        runnerThread = threading.Thread(target=runner.run)
        runnerThread.start()
        nextItem = sys.stdin.readline()
        while nextItem != '':
            runner.addItemToArgset(nextItem[:-1])
            try:
                nextItem = sys.stdin.readline()
            except:
                break
        runner.keepGoing = False
        runnerThread.join()
    else:
        runner = DistTask(cmd, concurrent_tasks, argset, stdoutWriter, endWhenDone=True)
        runner.run()

# vim: set ts=4 sw=4 expandtab
