Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- sklearn_model = SKLearnModel(model_data="s3://bucket/model.tar.gz", role="SageMakerRole", entry_point="predict.py")
- import os
- import re
- import pickle
- import numpy as np
- import pandas as pd
- import nltk
- nltk.download("stopwords")
- from nltk.corpus import stopwords
- from nltk.stem.porter import *
- from bs4 import BeautifulSoup
- import sagemaker_containers
- from sklearn.feature_extraction.text import CountVectorizer
- def model_fn(model_dir):
- #TODO How to load the word_dict.
- #TODO How to load the model.
- return model, word_dict
- def predict_fn(input_data, model):
- print('Inferring sentiment of input data.')
- trained_model, word_dict = model
- if word_dict is None:
- raise Exception('Model has not been loaded properly, no word_dict.')
- #Process input_data so that it is ready to be sent to our model.
- input_bow_csv = process_input_text(word_dict, input_data)
- prediction = trained_model.predict(input_bow_csv)
- return prediction
- def process_input_text(word_dict, input_data):
- words = text_to_words(input_data);
- vectorizer = CountVectorizer(preprocessor=lambda x: x, tokenizer=lambda x: x, word_dict)
- bow_array = vectorizer.transform([words]).toarray()[0]
- bow_csv = ",".join(str(bit) for bit in bow_array)
- return bow_csv
- def text_to_words(text):
- """
- Uses the Porter Stemmer to stem words in a review
- """
- #instantiate stemmer
- stemmer = PorterStemmer()
- text_nohtml = BeautifulSoup(text, "html.parser").get_text() # Remove HTML tags
- text_lower = re.sub(r"[^a-zA-Z0-9]", " ", text_nohtml.lower()) # Convert to lower case
- words = text_lower.split() # Split string into words
- words = [w for w in words if w not in stopwords.words("english")] # Remove stopwords
- words = [PorterStemmer().stem(w) for w in words] # stem
- return words
- def input_fn(input_data, content_type):
- return input_data;
- def output_fn(prediction_output, accept):
- return prediction_output;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement