Guest User

Untitled

a guest
Feb 21st, 2018
144
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. ### exercise 4 2te teil
  2. language, text = hamlets.iloc[0]
  3.  
  4. counted_text = count_words_fast(text)
  5.  
  6. data = pd.DataFrame({
  7. "word": list(counted_text.keys()),
  8. "count": list(counted_text.values())
  9. })
  10.  
  11. data["length"] = data["word"].apply(len)
  12.  
  13. data.loc[data["count"] > 10, "frequency"] = "frequent"
  14. data.loc[data["count"] <= 10, "frequency"] = "infrequent"
  15. data.loc[data["count"] == 1, "frequency"] = "unique"
  16.  
  17. languages = []
  18. for i in range(len(list(counted_text.keys()))):
  19. languages.append(language)
  20.  
  21. sub_data = pd.DataFrame({
  22. "language": languages,
  23. "mean_word_length": data.groupby(by = "frequency")["length"].mean(),
  24. })
  25.  
  26. sub_data["frequency"]=list(data["frequency"])
  27.  
  28. #sub_data.loc[data["frequency"]=="frequent", "mean_word_length"] = mean1
  29. #sub_data.loc[data["frequency"]=="infrequent", "mean_word_length"] = mean2
  30. #sub_data.loc[data["frequency"]=="unique", "mean_word_length"] = mean3
Add Comment
Please, Sign In to add comment