Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Mapper is same for both equality and inequality join
- #!/usr/bin/python
- import sys
- import os
- for line in sys.stdin:
- fileName = os.environ['map_input_file']
- tag = int(fileName[-5:-4]) #hardcoded to get '1' from 'Subj1.txt' and '2' from 'Subj2.txt'
- line = line.strip()
- [key,value] = line.split('\t',1)
- print "%s\t%s\t%s" %(key,value,tag)
- # Reducer for equality join
- #!/usr/bin/python
- from operator import itemgetter
- import sys
- current_key=None
- current_value=''
- key=None
- current_tag=None
- tag=None
- for line in sys.stdin:
- line=line.strip()
- key,value,tag=line.split('\t',2)
- try:
- key=int(key)
- tag=int(tag)
- except ValueError:
- continue
- if current_key==key:
- if current_tag==1:
- value1=current_value
- value2=value
- else:
- value1=value
- value2=current_value
- current_value=value1+'\t'+value2
- print '%s\t%s' %(current_key,current_value)
- else:
- current_value=value
- current_key=key
- current_tag=tag
- # Reducer for inequality join
- #!/usr/bin/python
- from operator import itemgetter
- import sys
- key1=None
- key2=None
- for line1 in sys.stdin:
- line1=line1.strip()
- key1,value1,tag1=line1.split('\t',2)
- try:
- key1=int(key1)
- tag1=int(tag1)
- except ValueError:
- continue
- if tag1==1:
- for line2 in sys.stdin:
- line2=line2.strip()
- key2,value2,tag2=line2.split('\t',2)
- try:
- key2=int(key2)
- tag2=int(tag2)
- except ValueError:
- continue
- if key1<key2 and tag2==2:
- print '%s\t%s\t%s\t%s' %(key1,key2,value1,value2)
- else:
- continue
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement