SHOW:
|
|
- or go back to the newest paste.
| 1 | # Mapper is same for both equality and inequality join | |
| 2 | #!/usr/bin/python | |
| 3 | ||
| 4 | import sys | |
| 5 | import os | |
| 6 | ||
| 7 | for line in sys.stdin: | |
| 8 | fileName = os.environ['map_input_file'] | |
| 9 | - | tag = int(fileName[-5:-4]) |
| 9 | + | tag = int(fileName[-5:-4]) #hardcoded to get '1' from 'Subj1.txt' and '2' from 'Subj2.txt' |
| 10 | line = line.strip() | |
| 11 | [key,value] = line.split('\t',1)
| |
| 12 | print "%s\t%s\t%s" %(key,value,tag) | |
| 13 | ||
| 14 | # Reducer for equality join | |
| 15 | #!/usr/bin/python | |
| 16 | ||
| 17 | from operator import itemgetter | |
| 18 | import sys | |
| 19 | ||
| 20 | current_key=None | |
| 21 | current_value='' | |
| 22 | key=None | |
| 23 | current_tag=None | |
| 24 | tag=None | |
| 25 | ||
| 26 | for line in sys.stdin: | |
| 27 | line=line.strip() | |
| 28 | key,value,tag=line.split('\t',2)
| |
| 29 | try: | |
| 30 | key=int(key) | |
| 31 | tag=int(tag) | |
| 32 | except ValueError: | |
| 33 | continue | |
| 34 | ||
| 35 | if current_key==key: | |
| 36 | if current_tag==1: | |
| 37 | value1=current_value | |
| 38 | value2=value | |
| 39 | else: | |
| 40 | value1=value | |
| 41 | value2=current_value | |
| 42 | ||
| 43 | current_value=value1+'\t'+value2 | |
| 44 | print '%s\t%s' %(current_key,current_value) | |
| 45 | else: | |
| 46 | current_value=value | |
| 47 | ||
| 48 | current_key=key | |
| 49 | current_tag=tag | |
| 50 | ||
| 51 | # Reducer for inequality join | |
| 52 | #!/usr/bin/python | |
| 53 | ||
| 54 | from operator import itemgetter | |
| 55 | import sys | |
| 56 | ||
| 57 | key1=None | |
| 58 | key2=None | |
| 59 | ||
| 60 | for line1 in sys.stdin: | |
| 61 | line1=line1.strip() | |
| 62 | key1,value1,tag1=line1.split('\t',2)
| |
| 63 | try: | |
| 64 | key1=int(key1) | |
| 65 | tag1=int(tag1) | |
| 66 | except ValueError: | |
| 67 | continue | |
| 68 | if tag1==1: | |
| 69 | for line2 in sys.stdin: | |
| 70 | line2=line2.strip() | |
| 71 | key2,value2,tag2=line2.split('\t',2)
| |
| 72 | try: | |
| 73 | key2=int(key2) | |
| 74 | tag2=int(tag2) | |
| 75 | except ValueError: | |
| 76 | continue | |
| 77 | if key1<key2 and tag2==2: | |
| 78 | print '%s\t%s\t%s\t%s' %(key1,key2,value1,value2) | |
| 79 | else: | |
| 80 | continue |