Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ### exercise 4 2te teil
- language, text = hamlets.iloc[0]
- counted_text = count_words_fast(text)
- data = pd.DataFrame({
- "word": list(counted_text.keys()),
- "count": list(counted_text.values())
- })
- data["length"] = data["word"].apply(len)
- data.loc[data["count"] > 10, "frequency"] = "frequent"
- data.loc[data["count"] <= 10, "frequency"] = "infrequent"
- data.loc[data["count"] == 1, "frequency"] = "unique"
- languages = []
- for i in range(len(list(counted_text.keys()))):
- languages.append(language)
- sub_data = pd.DataFrame({
- "language": languages,
- "mean_word_length": data.groupby(by = "frequency")["length"].mean(),
- })
- sub_data["frequency"]=list(data["frequency"])
- #sub_data.loc[data["frequency"]=="frequent", "mean_word_length"] = mean1
- #sub_data.loc[data["frequency"]=="infrequent", "mean_word_length"] = mean2
- #sub_data.loc[data["frequency"]=="unique", "mean_word_length"] = mean3
Add Comment
Please, Sign In to add comment