Advertisement
Guest User

Untitled

a guest
Mar 4th, 2019
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.42 KB | None | 0 0
  1. import csv
  2. import sys
  3. import re
  4. import psycopg2
  5.  
  6. conn = psycopg2.connect(host="db-postgresql-nyc1-82368-do-user-4775635-0.db.ondigitalocean.com", port="25060",
  7. database="big_bang", user="doadmin", password="jujyszdagjsh6rb6")
  8. conn.autocommit = True
  9. cur = conn.cursor()
  10.  
  11. seen = []
  12.  
  13.  
  14. def strip_common(s):
  15. common = [
  16. "AND", "ASSN", "ASSOC", "PLC", "INC", "PLLC", "ASSOCIATION", "INCORPORATED", "THE", "OF", "CO", "LIMITED",
  17. "SERVICE", "COMP", "LLC", "SERVICES",
  18. "COMPANY", "LLP", "STORE", "CORP", "LP", "SVCS", "CORPORATION", "LTD"]
  19. words = s.split()
  20.  
  21. res_words = [word for word in words if word not in common]
  22. res = ' '.join(res_words)
  23. return res if len(res) >= 4 else s
  24.  
  25.  
  26. def rem_vowel(s):
  27. vowels = ('A', 'E', 'I', 'O', 'U')
  28. for x in s:
  29. if x in vowels:
  30. s = s.replace(x, "")
  31. return s
  32.  
  33.  
  34. def address_id(record):
  35. s = ""
  36. record["STATE"] = record["STATE"].upper()
  37. record["STREET"] = record["STREET"].upper()
  38. if record["STATE"] and len(record["STATE"]) == 2:
  39. s += record["STATE"]
  40. else:
  41. s += "NN"
  42. street = re.sub('[^a-zA-Z]+', '', record["STREET"])
  43. if len(street) > 0:
  44. s += street[0]
  45. s += street[int(len(street)/2)]
  46. else:
  47. s += "NN"
  48. return s
  49.  
  50.  
  51. def company_id(record):
  52. s = ""
  53. fingerprint = strip_common(re.sub("[^A-Z\s]", "", record["COMPANY"].strip().upper())).replace(" ", "")
  54. if len(fingerprint) > 0:
  55. s += fingerprint[:2]
  56. s += fingerprint[int(len(fingerprint)/2)]
  57. s += fingerprint[-2:]
  58. return s.ljust(5, "*")
  59.  
  60.  
  61. def branch_hq_id(record):
  62. sql = "SELECT account_link_id FROM xplorer.crosswalk WHERE infogroup_id = %s;"
  63. cur.execute(sql, (record["DAX_HQ_ID"],))
  64. hq_id = cur.fetchone()
  65. return hq_id[0][:4] if hq_id else "0000"
  66.  
  67.  
  68. def generate_id(record):
  69. acct_link_id = ""
  70. if record["PLACE_TYPE"] == "headquarters":
  71. acct_link_id += company_id(record)
  72. acct_link_id += "00000"
  73. elif record["PLACE_TYPE"] == "branch":
  74. acct_link_id += branch_hq_id(record)
  75. acct_link_id += company_id(record)
  76. else:
  77. acct_link_id += "00000"
  78. acct_link_id += company_id(record)
  79. acct_link_id += address_id(record)
  80. return acct_link_id
  81.  
  82.  
  83. def save_place(record, acct_link_id):
  84. values = (
  85. acct_link_id, record["DOMAIN"], record["COMPANY"], record["STREET"], record["SUITE"], record["CITY"], record["STATE"],
  86. int(record["ZIP"]) if record["ZIP"] else None,
  87. int(record["ZIP_4"]) if record["ZIP_4"] else None,)
  88. sql = "INSERT INTO xplorer.places " \
  89. "(account_link_id, domain, name, street, suite, city, state, zip, zip_4, created_at, updated_at) " \
  90. "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW(), NOW());"
  91. try:
  92. cur.execute(sql, values)
  93. except Exception as e:
  94. print(cur.query)
  95. print(e)
  96. print("\n")
  97.  
  98.  
  99. with open(sys.argv[1]) as fd_in, open(sys.argv[2], "w") as fd_out:
  100. csv_in = csv.DictReader(fd_in)
  101. csv_out = csv.writer(fd_out)
  102. csv_out.writerow(["ACCOUNT_LINK_ID", "INFOGROUP_ID"])
  103. for row in csv_in:
  104. if row["PLACE_TYPE"] == "branch":
  105. continue
  106. if row["tab1_DAX_COMP_ID"] not in seen:
  107. seen.append(row["tab1_DAX_COMP_ID"])
  108. account_link_id = generate_id(row)
  109. save_place(row, account_link_id)
  110. csv_out.writerow([account_link_id, row["tab1_DAX_COMP_ID"]])
  111. conn.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement