Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- df = pd.DataFrame(columns=['Text','Selection_Values'])
- df["Text"] = ["Hi", "this is", "just", "a", "single", "sentence.", "This", np.nan, "is another one.","This is", "a", "third", "sentence","."]
- df["Selection_Values"] = [0,0,0,0,0,1,0,0,1,0,0,0,0,0]
- print(df)
- Text Selection_Values
- 0 Hi 0
- 1 this is 0
- 2 just 0
- 3 a 0
- 4 single 0
- 5 sentence. 1
- 6 This 0
- 7 NaN 0
- 8 is another one. 1
- 9 This is 0
- 10 a 0
- 11 third 0
- 12 sentence 0
- 13 . 0
- [["Hi this is just a single sentence."],["This is another one"], ["This is a third sentence ."]]
- [["Hi this is"], ["just a"], ["single sentence."],["This is another one"], ["This is"], ["a third sentence ."]]
- [[s.str.cat(sep=' ')] for s in np.split(df.Text, df[df.Selection_Values == 1].index+1) if not s.empty]
- [["Hi this is just a single sentence."],["This is another one"], ["This is a third sentence ."]]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement