SHARE
TWEET

Untitled

a guest Jun 24th, 2019 62 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. try:
  2.   import unzip_requirements
  3. except ImportError:
  4.   pass
  5.  
  6. import json
  7. import os
  8. import boto3
  9. import string
  10. import re
  11.  
  12. runtime= boto3.client('runtime.sagemaker')
  13. SAGEMAKER_ENDPOINT_NAME = os.environ['SAGEMAKER_ENDPOINT_NAME']
  14.  
  15. def generateTag(event, context):
  16.     sentence=[]
  17.     data = json.loads(event['body'])
  18.     raw_sentence = data["sentence"]
  19.     sentence.append(clean_text(raw_sentence))
  20.  
  21.     try:
  22.         payload = {"instances" : sentence,"configuration": {"k":3}}
  23.  
  24.         response = runtime.invoke_endpoint(EndpointName=SAGEMAKER_ENDPOINT_NAME,
  25.                                             ContentType='application/json',
  26.                                             Body=json.dumps(payload))
  27.  
  28.         result = json.loads(response['Body'].read().decode())
  29.         preb = []
  30.         labels = []
  31.         for label in result[0]['label']:
  32.             labels.append(label[9:])
  33.         return {'statusCode': 200, 'body': json.dumps(labels)}
  34.     except Exception as e:
  35.         print(e)
  36.         return {'statusCode': 400,
  37.                 'body': json.dumps({'error_message': 'Unable to generate tag.'})}
  38.  
  39.  
  40. def clean_text(text):
  41.     if not isinstance(text, str):
  42.         return text
  43.     def cleanhtml(raw_html):
  44.         cleanr = re.compile('<[^>]+>')
  45.         cleantext = re.sub(cleanr, '', raw_html)
  46.         return cleantext
  47.     def replace_link(match):
  48.         return '' if re.match('[a-z]+://', match.group(1)) else match.group(1)
  49.     def removeContractions(raw_text):
  50.         CONTRACTIONS = {"mayn't":"may not", "may've":"may have","isn't":"is not","wasn't":"was not","'ll":" will","'have": "have"}
  51.         raw_text = raw_text.replace("’","'")
  52.         words = raw_text.split()
  53.         reformed = [CONTRACTIONS[word] if word in CONTRACTIONS else word for word in words]
  54.         raw_text = " ".join(reformed)
  55.         return raw_text
  56.     text = cleanhtml(text)
  57.     text = re.sub('<pre><code>.*?</code></pre>', '', text)
  58.     text = re.sub('<a[^>]+>(.*)</a>', replace_link, text)
  59.     text = ' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)", " ", text).split())
  60.     text = ' '.join(re.sub("[\.\,\(\)\{\}\[\]\`\'\!\?\:\;\-\=]", " ", text).split())
  61.     return text
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top