Advertisement
Guest User

Untitled

a guest
Nov 28th, 2012
532
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Mapper is same for both equality and inequality join
  2. #!/usr/bin/python
  3.  
  4. import sys
  5. import os
  6.  
  7. for line in sys.stdin:
  8.     fileName = os.environ['map_input_file']
  9.     tag = int(fileName[-5:-4]) #hardcoded to get '1' from 'Subj1.txt' and '2' from 'Subj2.txt'
  10.     line = line.strip()
  11.     [key,value] = line.split('\t',1)
  12.     print "%s\t%s\t%s" %(key,value,tag)
  13.  
  14. # Reducer for equality join
  15. #!/usr/bin/python
  16.  
  17. from operator import itemgetter
  18. import sys
  19.  
  20. current_key=None
  21. current_value=''
  22. key=None
  23. current_tag=None
  24. tag=None
  25.  
  26. for line in sys.stdin:
  27.     line=line.strip()
  28.     key,value,tag=line.split('\t',2)
  29.     try:
  30.     key=int(key)
  31.         tag=int(tag)
  32.     except ValueError:
  33.     continue
  34.    
  35.     if current_key==key:
  36.         if current_tag==1:
  37.             value1=current_value
  38.             value2=value
  39.         else:
  40.             value1=value
  41.             value2=current_value
  42.  
  43.         current_value=value1+'\t'+value2
  44.         print '%s\t%s' %(current_key,current_value)
  45.     else:
  46.         current_value=value
  47.  
  48.     current_key=key
  49.     current_tag=tag
  50.  
  51. # Reducer for inequality join
  52. #!/usr/bin/python
  53.  
  54. from operator import itemgetter
  55. import sys
  56.  
  57. key1=None
  58. key2=None
  59.  
  60. for line1 in sys.stdin:
  61.     line1=line1.strip()
  62.     key1,value1,tag1=line1.split('\t',2)
  63.     try:
  64.     key1=int(key1)
  65.         tag1=int(tag1)
  66.     except ValueError:
  67.     continue
  68.     if tag1==1:    
  69.         for line2 in sys.stdin:
  70.             line2=line2.strip()
  71.             key2,value2,tag2=line2.split('\t',2)
  72.             try:
  73.                 key2=int(key2)
  74.                 tag2=int(tag2)
  75.             except ValueError:
  76.                 continue
  77.             if key1<key2 and tag2==2:
  78.                 print '%s\t%s\t%s\t%s' %(key1,key2,value1,value2)
  79.     else:
  80.         continue
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement