Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # bad data, the weird ones are bad
- data = """
- ts1,d001,d002,d003
- ts2,d001,d002,d003
- ts3,d001,d002,d003
- weird1,d001,d002,d003
- weird2,d001,d002,d003
- ts4,d001,d002,d003
- """
- # the good data
- other = """
- ts1,f001,f002,f003
- ts2,f001,f002,f003
- ts3,f001,f002,f003
- ts4,f001,f002,f003
- """
- # create demo files
- fn1 = "d1.csv"
- fn2 = "d2.csv"
- with open(fn1,"w") as f:
- f.write(data)
- with open(fn2,"w") as f:
- f.write(other)
- import csv
- def readFile(name):
- """returns a dict for data with 4 colums"""
- result = []
- with open(name,"r") as f:
- k = csv.DictReader(f,fieldnames=["ts","dp1","dp2","dp3"])
- for l in k:
- result.append(l)
- return result
- badData = readFile(fn1)
- goodData = readFile(fn2)
- print(badData)
- print(goodData)
- # weired data
- [{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'weird1', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'weird2', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]
- # good data
- [{'dp3': 'f003', 'ts': 'ts1', 'dp1': 'f001', 'dp2': 'f002'},
- {'dp3': 'f003', 'ts': 'ts2', 'dp1': 'f001', 'dp2': 'f002'},
- {'dp3': 'f003', 'ts': 'ts3', 'dp1': 'f001', 'dp2': 'f002'},
- {'dp3': 'f003 ', 'ts': 'ts4', 'dp1': 'f001', 'dp2': 'f002'}]
- # get all the "good" ts
- goodTs = set( oneDict["ts"] for oneDict in goodData)
- # clean the bad data, only keep those "ts" that are in goodTs
- cleanedData = [x for x in badData if x["ts"] in goodTs]
- print(cleanedData)
- # filtered weired data
- [{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
- {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]
Add Comment
Please, Sign In to add comment