Advertisement
Guest User

Untitled

a guest
Jun 15th, 2019
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.49 KB | None | 0 0
  1. ID STR
  2. 280 11040402-38.58551%;11050101-9.29086%;11070101-52.12363%
  3. 351 11130203-35%;11130230-65%
  4. 510 11070103-69%
  5. 655 11090103-41.63463%;11160102-58.36537%
  6. 666 11130205-50.00%;11130207-50%
  7.  
  8. df['STR'].apply(lambda x: y.split('-') for y in x.split(';'))
  9.  
  10. ID STR.LEFT STR.RIGHT
  11. 280 [11040402, 11050101, 11070101] [38.58551%, 9.29086%, 52.12363%]
  12. 351 [11130203, 11130230] [35%, 65%]
  13. 510 [11070103] [69%]
  14. 655 [11090103, 11160102] [41.63463%, 58.36537%]
  15. 666 [11130205, 11130207] [50.00%, 50%]
  16.  
  17. # set ID as index
  18. df.set_index('ID', inplace=True)
  19.  
  20.  
  21. new_series = df.STR.str.split(';', expand=True).stack().reset_index(level=-1,drop=True)
  22.  
  23. new_df = new_series.str.split('-', expand=True)
  24.  
  25. new_df.groupby('ID').agg(list).reset_index()
  26.  
  27. ID 0 1
  28. -- ---- ------------------------------------ --------------------------------------
  29. 0 280 ['11040402', '11050101', '11070101'] ['38.58551%', '9.29086%', '52.12363%']
  30. 1 351 ['11130203', '11130230'] ['35%', '65%']
  31. 2 510 ['11070103'] ['69%']
  32. 3 655 ['11090103', '11160102'] ['41.63463%', '58.36537%']
  33. 4 666 ['11130205', '11130207'] ['50.00%', '50%']
  34.  
  35. (df['STR'].str.extractall(r'(.*?)-(.*?)(?=;|$)')
  36. .groupby(level=0)
  37. .agg(list)
  38. .set_axis(['STR.LEFT', 'STR.RIGHT'], axis=1, inplace=False))
  39.  
  40. STR.LEFT STR.RIGHT
  41. 0 [11040402, ;11050101, ;11070101] [38.58551%, 9.29086%, 52.12363%]
  42. 1 [11130203, ;11130230] [35%, 65%]
  43. 2 [11070103] [69%]
  44. 3 [11090103, ;11160102] [41.63463%, 58.36537%]
  45. 4 [11130205, ;11130207] [50.00%, 50%]
  46.  
  47. (df['STR'].str.extractall(r'(.*?)-(.*?)(?=;|$)')
  48. .groupby(level=0)
  49. .agg(list)
  50. .set_axis(['STR.LEFT', 'STR.RIGHT'], axis=1, inplace=False)
  51. .join(df['ID'])
  52.  
  53. STR.LEFT STR.RIGHT ID
  54. 0 [11040402, ;11050101, ;11070101] [38.58551%, 9.29086%, 52.12363%] 280
  55. 1 [11130203, ;11130230] [35%, 65%] 351
  56. 2 [11070103] [69%] 510
  57. 3 [11090103, ;11160102] [41.63463%, 58.36537%] 655
  58. 4 [11130205, ;11130207] [50.00%, 50%] 666
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement