Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
181
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.11 KB | None | 0 0
  1. try:
  2. import unzip_requirements
  3. except ImportError:
  4. pass
  5.  
  6. import json
  7. import os
  8. import boto3
  9. import string
  10. import re
  11.  
  12. runtime= boto3.client('runtime.sagemaker')
  13. SAGEMAKER_ENDPOINT_NAME = os.environ['SAGEMAKER_ENDPOINT_NAME']
  14.  
  15. def generateTag(event, context):
  16. sentence=[]
  17. data = json.loads(event['body'])
  18. raw_sentence = data["sentence"]
  19. sentence.append(clean_text(raw_sentence))
  20.  
  21. try:
  22. payload = {"instances" : sentence,"configuration": {"k":3}}
  23.  
  24. response = runtime.invoke_endpoint(EndpointName=SAGEMAKER_ENDPOINT_NAME,
  25. ContentType='application/json',
  26. Body=json.dumps(payload))
  27.  
  28. result = json.loads(response['Body'].read().decode())
  29. preb = []
  30. labels = []
  31. for label in result[0]['label']:
  32. labels.append(label[9:])
  33. return {'statusCode': 200, 'body': json.dumps(labels)}
  34. except Exception as e:
  35. print(e)
  36. return {'statusCode': 400,
  37. 'body': json.dumps({'error_message': 'Unable to generate tag.'})}
  38.  
  39.  
  40. def clean_text(text):
  41. if not isinstance(text, str):
  42. return text
  43. def cleanhtml(raw_html):
  44. cleanr = re.compile('<[^>]+>')
  45. cleantext = re.sub(cleanr, '', raw_html)
  46. return cleantext
  47. def replace_link(match):
  48. return '' if re.match('[a-z]+://', match.group(1)) else match.group(1)
  49. def removeContractions(raw_text):
  50. CONTRACTIONS = {"mayn't":"may not", "may've":"may have","isn't":"is not","wasn't":"was not","'ll":" will","'have": "have"}
  51. raw_text = raw_text.replace("’","'")
  52. words = raw_text.split()
  53. reformed = [CONTRACTIONS[word] if word in CONTRACTIONS else word for word in words]
  54. raw_text = " ".join(reformed)
  55. return raw_text
  56. text = cleanhtml(text)
  57. text = re.sub('<pre><code>.*?</code></pre>', '', text)
  58. text = re.sub('<a[^>]+>(.*)</a>', replace_link, text)
  59. text = ' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)", " ", text).split())
  60. text = ' '.join(re.sub("[\.\,\(\)\{\}\[\]\`\'\!\?\:\;\-\=]", " ", text).split())
  61. return text
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement