Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- text =[list(['n[PROV', 'REPORT]nPerson', 'Name:', 'n','John', 'DearnProgram', 'Date:', '1/11/2000', '10:42', 'AMnMR']),
- list(['nToday', 'Name:', 'n','James', 'JaynProgram', 'Date:', '3/11/2000', '1:45', 'PMnmissing']),
- list(['n[NEWS', 'REPORT]nPerson', 'Name:', 'n','Jane', 'DoenProgram', 'Date:', '3/11/2000', '1:45', 'PMnMR']),
- list(['n[PROV', 'REPORT]nPerson', 'Name:', 'n','Amy', 'ArmynProgram', 'Date:', '10/1/2000', '11:45', 'AMnMR'])]
- df = pd.DataFrame({'Text' : text,
- 'ID': [1,2,3,4],
- 'P_ID': ['A','B','C','D'],
- })
- df
- ID P_ID Text
- 0 1 A [n[PROV, REPORT]nPerson, Name:, n, John, DearnProgram, Date:, 1/11/2000, 10:42, AMnMR]
- 1 2 B [nToday, Name:, n, James, JaynProgram, Date:, 3/11/2000, 1:45, PMnmissing]
- 2 3 C [n[NEWS, REPORT]nPerson, Name:, n, Jane, DoenProgram, Date:, 3/11/2000, 1:45, PMnMR]
- 3 4 D [n[PROV, REPORT]nPerson, Name:, n, Amy, ArmynProgram, Date:, 10/1/2000, 11:45, AMnMR]
- df['Text'].values
- array([ list(['n[PROV', 'REPORT]nPerson', 'Name:', 'n', 'John', 'DearnProgram', 'Date:', '1/11/2000', '10:42', 'AMnMR']),
- list(['nToday', 'Name:', 'n', 'James', 'JaynProgram', 'Date:', '3/11/2000', '1:45', 'PMnmissing']),
- list(['n[NEWS', 'REPORT]nPerson', 'Name:', 'n', 'Jane', 'DoenProgram', 'Date:', '3/11/2000', '1:45', 'PMnMR']),
- list(['n[PROV', 'REPORT]nPerson', 'Name:', 'n', 'Amy', 'ArmynProgram', 'Date:', '10/1/2000', '11:45', 'AMnMR'])], dtype=object)
- df['New_Text'] = df['Text'].replace(r'n', ' n ', regex=True)
- df['New_Text'].values
- array([ list(['n', '[PROV', 'REPORT]', 'n' ,'Person', 'Name:', 'n', 'John', 'Dear', 'n', 'Program', 'Date:', '1/11/2000', '10:42', 'AM', 'n', 'MR']),
- list(['n', 'Today', 'Name:', 'n', 'James', 'Jay', 'n', 'Program', 'Date:', '3/11/2000', '1:45', 'PM','n', 'missing']),
- list(['n', '[NEWS', 'REPORT]','n', 'Person', 'Name:', 'n', 'Jane', 'Doe', 'n', 'Program', 'Date:', '3/11/2000', '1:45', 'PM', 'n', 'MR']),
- list(['n', '[PROV', 'REPORT]', 'n', 'Person', 'Name:', 'n', 'Amy', 'Army', 'n', 'Program', 'Date:', '10/1/2000', '11:45', 'AM', 'n', 'MR'])], dtype=object)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement