Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'rok'
- from collections import defaultdict
- import operator
- filename = "u.data"
- lines = [line.strip() for line in open(filename)]
- ocene_filmov = defaultdict(list)
- stevilo_ogledanih_filmov = defaultdict(int)
- for line in lines:
- tokens = line.split("\t")
- ocene_filmov[tokens[1]].append(int(tokens[2]))
- stevilo_ogledanih_filmov[tokens[0]] += 1
- stevilo_filmov_nad_20 = 0
- out = "povprecne_ocene.out"
- data_out = "data.tab"
- povprecne_ocene = {}
- for key, value in ocene_filmov.iteritems():
- # print "Film", key, "ima", value, "ocene"
- if len(value) >= 20:
- stevilo_filmov_nad_20 += 1
- avg = sum(value) / float(len(value))
- povprecne_ocene[key] = avg
- stolpci = ["user_id", "item_id", "rating", "timestamp"]
- tipi = ["d", "d", "c", "d"]
- with open(data_out, "w") as f_out:
- for stolpec in stolpci:
- f_out.write(stolpec + "\t")
- f_out.write("\n")
- for tip in tipi:
- f_out.write(tip + "\t")
- f_out.write("\n")
- # special nesto
- f_out.write("\n")
- for line in lines:
- tokens = line.split("\t")
- if tokens[0] in povprecne_ocene.keys():
- f_out.write(line + "\n")
- sortirane_povp_ocene = sorted(povprecne_ocene.items(), key=operator.itemgetter(1))
- print "10 najslabsih: "
- i = 0
- while i != 10:
- print sortirane_povp_ocene[i][0]
- i += 1
- print "10 najboljsih: "
- i = 0
- while i != 10:
- print sortirane_povp_ocene[len(sortirane_povp_ocene) - 1 - i][0]
- i += 1
- print "Gledanost filmov: "
- sortirani_filmi_po_gledanosti = sorted(stevilo_ogledanih_filmov.items(), key=operator.itemgetter(1))
- print "10 najmanj gledanih: "
- i = 0
- while i != 10:
- print sortirani_filmi_po_gledanosti[i][0]
- i += 1
- print "10 najbolj gledanih: "
- i = 0
- while i != 10:
- print sortirani_filmi_po_gledanosti[len(sortirani_filmi_po_gledanosti) - 1 - i][0]
- i += 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement