Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2017
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.83 KB | None | 0 0
  1. #!/usr/bin/python
  2. import sys
  3. from sys import argv
  4. import collections
  5. import os
  6. import re
  7. import subprocess
  8. import pp
  9.  
  10. script, nproc, filename1, filename2, filename3 = argv
  11.  
  12.  
  13. def count_lines(file):
  14. # print "Counting lines..."
  15. out = subprocess.Popen(['wc', '-l', file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()[0]
  16. line_count = int(out.split(' ')[1])
  17. #print(line_count)
  18. print line_count, "lines found."
  19. return line_count
  20.  
  21. def splitFile(file, line_count):
  22. print "Partitioning files..."
  23. with open(file) as f:
  24. partition = line_count/4
  25. f = f.readlines()
  26. p1 = f[0:partition]
  27. # print(p1)
  28. p2 = f[partition + 1: partition * 2]
  29. # print(p2)
  30. p3 = f[partition * 2 + 1: partition * 3]
  31. # print(p3)
  32. p4 = f[partition * 3 + 1: line_count]
  33. # print(p4)
  34. partitions = (p1, p2, p3, p4)
  35. print len(partitions), "partitions created."
  36. return partitions
  37.  
  38. def headerreplace(partition, file2, file3):
  39. # initialize dictionary
  40. d = {}
  41. with open(file2, 'w') as corrected, open(file3) as f:
  42. #create dictionary
  43. for line in f:
  44. line = line.rstrip()
  45. (key, val) = line.split(" ", 1)
  46. d[key] = val
  47. # parse original and print replacement to corrected
  48. count = 1
  49. for line in partition:
  50. #print line
  51. for key in d:
  52. if count < len(d):
  53. #print key
  54. if key in line:
  55. new_line = line.replace(key, d[key])
  56. #print d[key]
  57. corrected.write(new_line)
  58. # print new_line
  59. count = len(d)
  60. break
  61. else:
  62. count += 1
  63. elif count == len(d):
  64. corrected.write(line)
  65. count = 0
  66. # print line
  67. return "Finished job"
  68.  
  69.  
  70.  
  71. # tuple of all parallel python servers to connect with
  72. ppservers = ()
  73.  
  74. if len(sys.argv) > 1:
  75. ncpus = int(sys.argv[1])
  76. # Creates jobserver with ncpus workers
  77. job_server = pp.Server(ncpus, ppservers=ppservers)
  78. else:
  79. # Creates jobserver with automatically detected number of workers
  80. job_server = pp.Server(ppservers=ppservers)
  81.  
  82. print "Starting pp with", job_server.get_ncpus(), "workers"
  83.  
  84.  
  85. def master():
  86. jobs = []
  87. line_count = count_lines(filename1)
  88. partitions = splitFile(filename1, line_count)
  89. corrected = ("corrected1.txt", "corrected2.txt", "corrected3.txt", "corrected4.txt")
  90. for partition, outfile in zip(partitions, corrected):
  91. jobs.append(job_server.submit(headerreplace, (partition, outfile, filename3)))
  92. for job in jobs:
  93. result = job()
  94. if result:
  95. print "Completed job"
  96.  
  97. master()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement