Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def validate_format(number):
- length = len(number)
- if length == 14:
- if (re.match(formats[0], number)):
- return True
- return False
- if length == 12:
- if (re.match(formats[1], number)):
- return True
- return False
- if length == 11:
- if (re.match(formats[2], number)):
- return True
- return False
- if length == 10:
- if (re.match(formats[3], number)):
- return True
- return False
- return False
- def create_dataframe(spark):
- my_cols = Row("Column1", "Column2", "Column3", "Column4")
- row_1 = my_cols('(617)-283-3811', 'Salah', 'Messi', None)
- row_2 = my_cols('617-2833811', 'Messi', 'Virgil', 'Messi')
- row_3 = my_cols('617-283-3811', 'Ronaldo', 'Messi', 'Ronaldo')
- row_seq = [row_1, row_2, row_3]
- df = spark.createDataFrame(row_seq)
- spark.udf.register("validateFormat", validate_format, BooleanType())
- df = df.select([c for c in df.columns if validateFormat(c)])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement