Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'uolter'
- import map_reduce
- def mapper(input_key, input_value):
- def cut_and_clean_value(cluster):
- """
- :param cluster: string in the format <cluster>:<value>
- :return: touple cluster and value. If value is NaN return 0
- """
- ret = cluster.split(":")
- if ret[1] == 'NaN':
- ret[1] = 0.0
- val = round(float(ret[1]), 2)
- if val >= 0.80:
- val = 4
- elif val >= 0.60:
- val = 3
- elif val >= 0.40:
- val = 2
- elif val >= 0.20:
- val = 1
- else:
- val = 0
- return ret[0], val
- return [cut_and_clean_value(c) for c in input_value.split(";")]
- def reducer(intermediate_key, intermediate_value_list):
- return intermediate_key, sum(intermediate_value_list)
- def main():
- input_file = 'text/input.txt'
- i = {}
- with open(input_file) as file:
- data = file.readlines()
- for d in data:
- user_id = d[:9]
- i[user_id] = d[10:]
- print map_reduce.map_reduce(i, mapper, reducer)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement