Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //wczytanie plikow
- val txt1 = sc.textFile("hdfs://grid223-20:9000/input/taglogsbig/huge10g.log")
- val txt2 = sc.textFile("hdfs://grid223-20:9000/input/taglogsbig/huge10g2.log")
- val txt3 = sc.textFile("hdfs://grid223-20:9000/input/taglogsbig/huge10g3.log")
- //polaczenie plikow
- val txt = txt1.union(txt2).union(txt3)
- //wyliczenia czasowe dla naszego uzytkownika
- val user13TimePerHost = txt.filter(x=>x.split("\t")(1) == "user39").map(x => (x.split("\t")(0),x.split("\t").last.toDouble)).reduceByKey(_+_)
- //wyliczenia czasowe dla wszystkich uzytkownikow
- val allUsersTimePerHost = txt.map(x => (x.split("\t")(0),x.split("\t").last.toDouble)).reduceByKey(_+_)
- //polaczenie
- val totalData = allUsersTimePerHost.join(user13TimePerHost)
- //zwrocenie danych
- val finalResult = totalData.map(x => (x._1,x._2._2*100/x._2._1)).collect()
- System.exit(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement