SHOW:
|
|
- or go back to the newest paste.
1 | # Mapper is same for both equality and inequality join | |
2 | #!/usr/bin/python | |
3 | ||
4 | import sys | |
5 | import os | |
6 | ||
7 | for line in sys.stdin: | |
8 | fileName = os.environ['map_input_file'] | |
9 | - | tag = int(fileName[-5:-4]) |
9 | + | tag = int(fileName[-5:-4]) #hardcoded to get '1' from 'Subj1.txt' and '2' from 'Subj2.txt' |
10 | line = line.strip() | |
11 | [key,value] = line.split('\t',1) | |
12 | print "%s\t%s\t%s" %(key,value,tag) | |
13 | ||
14 | # Reducer for equality join | |
15 | #!/usr/bin/python | |
16 | ||
17 | from operator import itemgetter | |
18 | import sys | |
19 | ||
20 | current_key=None | |
21 | current_value='' | |
22 | key=None | |
23 | current_tag=None | |
24 | tag=None | |
25 | ||
26 | for line in sys.stdin: | |
27 | line=line.strip() | |
28 | key,value,tag=line.split('\t',2) | |
29 | try: | |
30 | key=int(key) | |
31 | tag=int(tag) | |
32 | except ValueError: | |
33 | continue | |
34 | ||
35 | if current_key==key: | |
36 | if current_tag==1: | |
37 | value1=current_value | |
38 | value2=value | |
39 | else: | |
40 | value1=value | |
41 | value2=current_value | |
42 | ||
43 | current_value=value1+'\t'+value2 | |
44 | print '%s\t%s' %(current_key,current_value) | |
45 | else: | |
46 | current_value=value | |
47 | ||
48 | current_key=key | |
49 | current_tag=tag | |
50 | ||
51 | # Reducer for inequality join | |
52 | #!/usr/bin/python | |
53 | ||
54 | from operator import itemgetter | |
55 | import sys | |
56 | ||
57 | key1=None | |
58 | key2=None | |
59 | ||
60 | for line1 in sys.stdin: | |
61 | line1=line1.strip() | |
62 | key1,value1,tag1=line1.split('\t',2) | |
63 | try: | |
64 | key1=int(key1) | |
65 | tag1=int(tag1) | |
66 | except ValueError: | |
67 | continue | |
68 | if tag1==1: | |
69 | for line2 in sys.stdin: | |
70 | line2=line2.strip() | |
71 | key2,value2,tag2=line2.split('\t',2) | |
72 | try: | |
73 | key2=int(key2) | |
74 | tag2=int(tag2) | |
75 | except ValueError: | |
76 | continue | |
77 | if key1<key2 and tag2==2: | |
78 | print '%s\t%s\t%s\t%s' %(key1,key2,value1,value2) | |
79 | else: | |
80 | continue |