Untitled

#  bad data, the weird ones are bad

data = """
ts1,d001,d002,d003
ts2,d001,d002,d003
ts3,d001,d002,d003
weird1,d001,d002,d003
weird2,d001,d002,d003
ts4,d001,d002,d003
"""

# the good data

other = """
ts1,f001,f002,f003
ts2,f001,f002,f003
ts3,f001,f002,f003
ts4,f001,f002,f003
"""

# create demo files
fn1 = "d1.csv"
fn2 = "d2.csv"
with open(fn1,"w") as f:
    f.write(data)
with open(fn2,"w") as f:
    f.write(other)

import csv

def readFile(name):
    """returns a dict for data with 4 colums"""
    result = []
    with open(name,"r") as f:
        k = csv.DictReader(f,fieldnames=["ts","dp1","dp2","dp3"])
        for l in k:
            result.append(l)
    return result

badData = readFile(fn1)
goodData = readFile(fn2)

print(badData)
print(goodData)

# weired data
 [{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
  {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
  {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
  {'dp3': 'd003', 'ts': 'weird1', 'dp1': 'd001', 'dp2': 'd002'},
  {'dp3': 'd003', 'ts': 'weird2', 'dp1': 'd001', 'dp2': 'd002'},
  {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]

# good data
[{'dp3': 'f003', 'ts': 'ts1', 'dp1': 'f001', 'dp2': 'f002'},
 {'dp3': 'f003', 'ts': 'ts2', 'dp1': 'f001', 'dp2': 'f002'},
 {'dp3': 'f003', 'ts': 'ts3', 'dp1': 'f001', 'dp2': 'f002'},
 {'dp3': 'f003 ', 'ts': 'ts4', 'dp1': 'f001', 'dp2': 'f002'}]

# get all the "good" ts
goodTs = set( oneDict["ts"] for oneDict in goodData)

# clean the bad data, only keep those "ts" that are in goodTs
cleanedData = [x for x in badData if x["ts"] in goodTs]

print(cleanedData)

# filtered weired data
[{'dp3': 'd003', 'ts': 'ts1', 'dp1': 'd001', 'dp2': 'd002'},
 {'dp3': 'd003', 'ts': 'ts2', 'dp1': 'd001', 'dp2': 'd002'},
 {'dp3': 'd003', 'ts': 'ts3', 'dp1': 'd001', 'dp2': 'd002'},
 {'dp3': 'd003 ', 'ts': 'ts4', 'dp1': 'd001', 'dp2': 'd002'}]