Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##################################################
- #
- # chuli.py
- # chu li is chinese for processing
- #
- #
- ##################################################
- import subprocess
- import os
- from put import lazyWrite
- class chuli:
- """Performs chu li on data set"""
- def __init__(self, downDir, upDir):
- """note that downDir and upDir should be in absolute"""
- self.downDir = downDir
- self.upDir = upDir
- self.downFinalDir = "downFinal"
- def run(self):
- """co-ordinate this class"""
- # Note that we should do the walking here because
- # we can save time "walking"
- self.act( self.downDir, self.downFinalDir)
- def act(self, fname, UorDFinalDir):
- """manages the walking, walk once only for all the functions"""
- if os.path.isdir( fname):
- for root, dir, files, in os.walk(fname):
- for single_file in files:
- self._act( os.path.join(root, single_file), UorDFinalDir)
- else:
- self._act(fname, UorDFinalDir)
- def _act(self, singlef, UorDFinalDir):
- """Workhorse of _act, add functions here if you want them to
- be applied to each individual file during os.walk"""
- self.sort(singlef)
- def sort(self, singlef):
- """reads .in then sort to .sort and mv to .in again
- NOTE: singlef is a SINGLE FILE """
- sortName = os.extsep.join( [os.path.splitext( singlef )[0], "sort"] )
- #print "sorting", singlef, sortName
- ret = subprocess.call(["sort", "-u", singlef], stdout=open( sortName, 'w'))
- if ret != 0: raise Exception("sort calling on " + singlef + " returns non zero")
- ret = subprocess.call(["mv", sortName, singlef])
- def toFinal(self, singlef, UorDFinalDir):
- """reads a file, output using lazyWrite
- NOTE THAT the first line of the final file is a \\n (Does that matter?)"""
- writeAdr = self.getWriteDir( singlef, UorDFinalDir)
- f = open(singlef, 'r')
- line = f.readline()
- last_link = None
- while line:
- (link1, link2) = line.split(' ')
- if last_link != link1: # a new link1
- #print "DEBUG", writeAdr, link1
- last_link = link1
- lazyWrite( writeAdr, '\n' + link1 + ':' + '\n')
- lazyWrite( writeAdr, link2 ) #link2 has a \n at the end
- line = f.readline()
- f.close()
- if __name__ == "__main__":
- import sys
- if len(sys.argv) != 3:
- print "Usage: python chuli.py [downlinkDir] [uplinkDir]"
- exit()
- downDir = os.path.abspath(sys.argv[1])
- upDir = os.path.abspath(sys.argv[2])
- myChuli = chuli( downDir, upDir)
- myChuli.run()
Add Comment
Please, Sign In to add comment