Guest User

Untitled

a guest
May 26th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. # bad data, the weird ones are bad
  2.  
  3. data = """
  4. ts1,d001,d002,d003
  5. ts2,d001,d002,d003
  6. ts3,d001,d002,d003
  7. weird1,d001,d002,d003
  8. weird2,d001,d002,d003
  9. ts4,d001,d002,d003
  10. """
  11.  
  12. # the good data
  13.  
  14. other = """
  15. ts1,f001,f002,f003
  16. ts2,f001,f002,f003
  17. ts3,f001,f002,f003
  18. ts4,f001,f002,f003
  19. """
  20.  
  21. # create demo files
  22. fn1 = "d1.csv"
  23. fn2 = "d2.csv"
  24. with open(fn1,"w") as f:
  25. f.write(data)
  26. with open(fn2,"w") as f:
  27. f.write(other)
  28.  
  29. import csv
  30.  
  31. def readFile(name):
  32. """returns a dict for data with 4 colums"""
  33. result = []
  34. with open(name,"r") as f:
  35. k = csv.DictReader(f,fieldnames=["ts","dp1","dp2","dp3"])
  36. for l in k:
  37. result.append(l)
  38. return result
  39.  
  40. badData = readFile(fn1)
  41. goodData = readFile(fn2)
  42.  
  43. print(badData)
  44. print(goodData)
  45.  
  46. # weired data
  47. [{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
  48. {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
  49. {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
  50. {'dp3': 'd003', 'ts': 'weird1', 'dp1': 'd001', 'dp2': 'd002'},
  51. {'dp3': 'd003', 'ts': 'weird2', 'dp1': 'd001', 'dp2': 'd002'},
  52. {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]
  53.  
  54. # good data
  55. [{'dp3': 'f003', 'ts': 'ts1', 'dp1': 'f001', 'dp2': 'f002'},
  56. {'dp3': 'f003', 'ts': 'ts2', 'dp1': 'f001', 'dp2': 'f002'},
  57. {'dp3': 'f003', 'ts': 'ts3', 'dp1': 'f001', 'dp2': 'f002'},
  58. {'dp3': 'f003 ', 'ts': 'ts4', 'dp1': 'f001', 'dp2': 'f002'}]
  59.  
  60. # get all the "good" ts
  61. goodTs = set( oneDict["ts"] for oneDict in goodData)
  62.  
  63. # clean the bad data, only keep those "ts" that are in goodTs
  64. cleanedData = [x for x in badData if x["ts"] in goodTs]
  65.  
  66. print(cleanedData)
  67.  
  68. # filtered weired data
  69. [{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
  70. {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
  71. {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
  72. {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]
Add Comment
Please, Sign In to add comment