View difference between Paste ID: kEJwd2u1 and 8Lug6Q1V
SHOW: | | - or go back to the newest paste.
1
# Mapper is same for both equality and inequality join
2
#!/usr/bin/python
3
4
import sys
5
import os
6
7
for line in sys.stdin:
8
    fileName = os.environ['map_input_file']
9-
    tag = int(fileName[-5:-4])
9+
    tag = int(fileName[-5:-4]) #hardcoded to get '1' from 'Subj1.txt' and '2' from 'Subj2.txt'
10
    line = line.strip()
11
    [key,value] = line.split('\t',1)
12
    print "%s\t%s\t%s" %(key,value,tag)
13
14
# Reducer for equality join
15
#!/usr/bin/python
16
17
from operator import itemgetter
18
import sys
19
20
current_key=None
21
current_value=''
22
key=None
23
current_tag=None
24
tag=None
25
26
for line in sys.stdin:
27
    line=line.strip()
28
    key,value,tag=line.split('\t',2)
29
    try:
30
	key=int(key)
31
        tag=int(tag)
32
    except ValueError:
33
	continue
34
	
35
    if current_key==key:
36
        if current_tag==1:
37
            value1=current_value
38
            value2=value
39
        else:
40
            value1=value
41
            value2=current_value
42
43
        current_value=value1+'\t'+value2
44
        print '%s\t%s' %(current_key,current_value)
45
    else:
46
        current_value=value
47
48
    current_key=key
49
    current_tag=tag
50
51
# Reducer for inequality join
52
#!/usr/bin/python
53
54
from operator import itemgetter
55
import sys
56
57
key1=None
58
key2=None
59
60
for line1 in sys.stdin:
61
    line1=line1.strip()
62
    key1,value1,tag1=line1.split('\t',2)
63
    try:
64
	key1=int(key1)
65
        tag1=int(tag1)
66
    except ValueError:
67
	continue
68
    if tag1==1:    
69
        for line2 in sys.stdin:
70
            line2=line2.strip()
71
            key2,value2,tag2=line2.split('\t',2)
72
            try:
73
                key2=int(key2)
74
                tag2=int(tag2)
75
            except ValueError:
76
                continue
77
            if key1<key2 and tag2==2:
78
                print '%s\t%s\t%s\t%s' %(key1,key2,value1,value2)
79
    else:
80
        continue