SHARE
TWEET

Untitled

a guest Nov 14th, 2019 92 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. Matvey Kryuchkov, [14 Nov 2019 at 22:37:39]:
  2. #!/usr/bin/env python3
  3.  
  4. import sys
  5. import random
  6.  
  7. for line in sys.stdin:
  8.     print(random.randint(1, 10**4), line.strip())
  9.  
  10. #!/usr/bin/env python3
  11.  
  12. import sys
  13. import random
  14.  
  15. current_num = random.randint(1, 5)
  16. current_count = 0
  17. current_lines = []
  18. total_count = 0
  19. for line in sys.stdin:
  20.     try:
  21.         rand, id_ = line.strip().split(' ')
  22.         current_count += 1
  23.         if (current_count == current_num):
  24.             for id1 in current_lines:
  25.                 print(id1 + ',', end='')
  26.             print(id_)
  27.             current_lines = []
  28.             current_count = 0
  29.             current_num = random.randint(1, 5)
  30.             total_count += 1
  31.         else:
  32.             current_lines.append(id_)
  33.  
  34.     except ValueError as e:
  35.         continue
  36.    
  37.     if (total_count > 50):
  38.         break
  39.    
  40. if (total_count < 50):
  41.     for id1 in current_lines:
  42.         print(id1 + ',', end='')
  43.     print(current_lines[-1])
  44.  
  45. #!/usr/bin/env bash                                                                                                                                                                                                                                                                                                                        
  46.  
  47. OUT_DIR="out"
  48. NUM_REDUCERS=5
  49.  
  50. hadoop fs -rm -r -skipTrash $OUT_DIR* > /dev/null                                                                                                                                                                                                                                                                          
  51.  
  52. yarn jar /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar -D mapreduce.job.reduces=${NUM_REDUCERS} -files mapper.py,reducer.py -mapper mapper.py -reducer reducer.py -input /data/ids_part -output $OUT_DIR > /dev/null                                                                          
  53.  
  54. hdfs dfs -cat ${OUT_DIR}/part-00000
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top