Advertisement
Guest User

Untitled

a guest
Aug 18th, 2019
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.94 KB | None | 0 0
  1. sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", entry_point="predict.py")
  2.  
  3. import os
  4. import re
  5. import pickle
  6. import numpy as np
  7. import pandas as pd
  8. import nltk
  9. nltk.download("stopwords")
  10. from nltk.corpus import stopwords
  11. from nltk.stem.porter import *
  12. from bs4 import BeautifulSoup
  13. import sagemaker_containers
  14.  
  15. from sklearn.feature_extraction.text import CountVectorizer
  16.  
  17.  
  18.  
  19. def model_fn(model_dir):
  20.  
  21. #TODO How to load the word_dict.
  22. #TODO How to load the model.
  23. return model, word_dict
  24.  
  25. def predict_fn(input_data, model):
  26. print('Inferring sentiment of input data.')
  27. trained_model, word_dict = model
  28. if word_dict is None:
  29. raise Exception('Model has not been loaded properly, no word_dict.')
  30.  
  31. #Process input_data so that it is ready to be sent to our model.
  32.  
  33. input_bow_csv = process_input_text(word_dict, input_data)
  34. prediction = trained_model.predict(input_bow_csv)
  35. return prediction
  36.  
  37.  
  38. def process_input_text(word_dict, input_data):
  39.  
  40. words = text_to_words(input_data);
  41. vectorizer = CountVectorizer(preprocessor=lambda x: x, tokenizer=lambda x: x, word_dict)
  42. bow_array = vectorizer.transform([words]).toarray()[0]
  43. bow_csv = ",".join(str(bit) for bit in bow_array)
  44. return bow_csv
  45.  
  46. def text_to_words(text):
  47. """
  48. Uses the Porter Stemmer to stem words in a review
  49. """
  50. #instantiate stemmer
  51. stemmer = PorterStemmer()
  52. text_nohtml = BeautifulSoup(text, "html.parser").get_text() # Remove HTML tags
  53. text_lower = re.sub(r"[^a-zA-Z0-9]", " ", text_nohtml.lower()) # Convert to lower case
  54. words = text_lower.split() # Split string into words
  55. words = [w for w in words if w not in stopwords.words("english")] # Remove stopwords
  56. words = [PorterStemmer().stem(w) for w in words] # stem
  57. return words
  58.  
  59. def input_fn(input_data, content_type):
  60. return input_data;
  61.  
  62. def output_fn(prediction_output, accept):
  63. return prediction_output;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement