Advertisement
Guest User

Untitled

a guest
Jul 24th, 2019
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.44 KB | None | 0 0
  1. address = """
  2. CHRIS NISWANDEE
  3. SMALLSYS INC
  4. 795 E DRAGRAM ST
  5. TUCSON AZ 85705
  6. USA
  7. """
  8.  
  9. import spacy
  10. from spacy.lang.en import English
  11.  
  12. nlp = spacy.load('en_core_web_lg')
  13.  
  14. STRUCTURE = {
  15. 'country':'',
  16. 'state': '',
  17. 'city': '',
  18. 'pincode': '',
  19. 'street': '',
  20. 'orgranisation': '',
  21. 'person': ''
  22. }
  23.  
  24. results = {}
  25.  
  26. def conditions(index, names, entities, line):
  27. checks = {0:'country', 1:'state-city', 2:'street', 3:'organisation', 4:'person'}
  28. if checks[index] == 'country':
  29. if 'CARDINAL' in entities:
  30. i = entities.index('CARDINAL')
  31. results['pincode'] = names[i]
  32. if 'GPE' in entities:
  33. i = entities.index('GPE')
  34. results['country'] = names[i]
  35. if checks[index] == 'state-city':
  36. if 'CARDINAL' in entities:
  37. i = entities.index('CARDINAL')
  38. results['pincode'] = names[i]
  39. if 'GPE' in entities:
  40. i = entities.index('GPE')
  41. results['city'] = names[i]
  42. for word in line.split(' '):
  43. if len(word) == 2:
  44. results['state'] = word
  45. if word not in names:
  46. results['city'] = word
  47. if word.isnumeric():
  48. results['pincode'] = word
  49. if checks[index] == 'street':
  50. if 'CARDINAL' in entities:
  51. i = entities.index('CARDINAL')
  52. results['unit-number'] = names[i]
  53. print("words are", words)
  54. for j in range(0, len(words)-1):
  55. if words[j+1].lower() in ['street', 'st', 'strt']:
  56. street_name = words[j] + ' ' + words[j+1]
  57. results['street'] = street_name
  58.  
  59.  
  60.  
  61. def get_named_entities(address):
  62. doc = nlp(address)
  63. names = []
  64. entities = []
  65. for ent in doc.ents:
  66. names.append(ent.text)
  67. entities.append(ent.label_)
  68. return (names, entities)
  69.  
  70. def parseAddress(address):
  71. address = address.split('\n')
  72. address_list = [item.split(',') for item in address]
  73. cleaned_address = []
  74. for address in address_list:
  75. for word in address:
  76. if len(word) != 0:
  77. cleaned_address.append(word)
  78. return cleaned_address
  79.  
  80. def some(address):
  81. lines = parseAddress(address)
  82. for i in range(len(lines)-1, -1, -1):
  83. names, entities = get_named_entities(lines[i])
  84. print(names, entities, lines[i])
  85. conditions(len(lines)-1-i, names, entities, lines[i])
  86.  
  87.  
  88.  
  89.  
  90. some(address)
  91. print(results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement