Advertisement
avisrivastava254084

Untitled

Sep 30th, 2019
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.01 KB | None | 0 0
  1. def validate_format(number):
  2.     length = len(number)
  3.     if length == 14:
  4.         if (re.match(formats[0], number)):
  5.             return True
  6.         return False
  7.     if length == 12:
  8.         if (re.match(formats[1], number)):
  9.             return True
  10.         return False
  11.     if length == 11:
  12.         if (re.match(formats[2], number)):
  13.             return True
  14.         return False
  15.     if length == 10:
  16.         if (re.match(formats[3], number)):
  17.             return True
  18.         return False
  19.     return False
  20.  
  21.  
  22. def create_dataframe(spark):
  23.     my_cols = Row("Column1", "Column2", "Column3", "Column4")
  24.     row_1 = my_cols('(617)-283-3811', 'Salah', 'Messi', None)
  25.     row_2 = my_cols('617-2833811', 'Messi', 'Virgil', 'Messi')
  26.     row_3 = my_cols('617-283-3811', 'Ronaldo', 'Messi', 'Ronaldo')
  27.     row_seq = [row_1, row_2, row_3]
  28.     df = spark.createDataFrame(row_seq)
  29.     spark.udf.register("validateFormat", validate_format, BooleanType())
  30.     df = df.select([c for c in df.columns if validateFormat(c)])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement