Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function, unicode_literals
- import random
- import logging
- import os
- os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'
- from textblob import TextBlob
- from config import FILTER_WORDS
- logging.basicConfig()
- logger = logging.getLogger()
- logger.setLevel(logging.DEBUG)
- # start:example-hello.py
- # Sentences we'll respond with if the user greeted us
- GREETING_KEYWORDS = ("hello", "hi", "greetings", "sup", "what's up",)
- GREETING_RESPONSES = ["'sup ", "hey", "*nods*", "yo"]
- def check_for_greeting(sentence):
- """If any of the words in the user's input was a greeting, return a greeting response"""
- for word in sentence.words:
- if word.lower() in GREETING_KEYWORDS:
- return random.choice(GREETING_RESPONSES)
- # start:example-none.py
- # Sentences we'll respond with if we have no idea what the user just said
- NONE_RESPONSES = [
- "uh whatever",
- "meet me at the wall?",
- "code hard",
- ]
- # end
- # start:example-self.py
- # If the user tries to tell us something about ourselves, use one of these responses
- COMMENTS_ABOUT_SELF = [
- "You're just jealous",
- "I worked really hard on that",
- ]
- # end
- class UnacceptableUtteranceException(Exception):
- """Raise this (uncaught) exception if the response was going to trigger our blacklist"""
- pass
- def starts_with_vowel(word):
- """Check for pronoun compability -- 'a' vs. 'an'"""
- return True if word[0] in 'aeiou' else False
- def chatbot(sentence):
- """Main program loop: select a response for the input sentence and return it"""
- logger.info("chatbot: respond to %s", sentence)
- resp = respond(sentence)
- return resp
- # start:example-pronoun.py
- def find_pronoun(sent):
- """Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
- pronoun is found in the input"""
- pronoun = None
- for word, part_of_speech in sent.pos_tags:
- # Disambiguate pronouns
- if part_of_speech == 'PRP' and word.lower() == 'you':
- pronoun = 'I'
- elif part_of_speech == 'PRP' and word == 'I':
- # If the user mentioned themselves, then they will definitely be the pronoun
- pronoun = 'You'
- return pronoun
- # end
- def find_verb(sent):
- """Pick a candidate verb for the sentence."""
- verb = None
- pos = None
- for word, part_of_speech in sent.pos_tags:
- if part_of_speech.startswith('VB'): # This is a verb
- verb = word
- pos = part_of_speech
- break
- return verb, pos
- def find_noun(sent):
- """Given a sentence, find the best candidate noun."""
- noun = None
- if not noun:
- for w, p in sent.pos_tags:
- if p == 'NN': # This is a noun
- noun = w
- break
- if noun:
- logger.info("Found noun: %s", noun)
- return noun
- def find_adjective(sent):
- """Given a sentence, find the best candidate adjective."""
- adj = None
- for w, p in sent.pos_tags:
- if p == 'JJ': # This is an adjective
- adj = w
- break
- return adj
- # start:example-construct-response.py
- def construct_response(pronoun, noun, verb):
- """No special cases matched, so we're going to try to construct a full sentence that uses as much
- of the user's input as possible"""
- resp = []
- if pronoun:
- resp.append(pronoun)
- # We always respond in the present tense, and the pronoun will always either be a passthrough
- # from the user, or 'you' or 'I', in which case we might need to change the tense for some
- # irregular verbs.
- if verb:
- verb_word = verb[0]
- if verb_word in ('be', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
- if pronoun.lower() == 'you':
- # The bot will always tell the person they aren't whatever they said they were
- resp.append("aren't really")
- else:
- resp.append(verb_word)
- if noun:
- pronoun = "an" if starts_with_vowel(noun) else "a"
- resp.append(pronoun + " " + noun)
- resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))
- return " ".join(resp)
- # end
- # start:example-check-for-self.py
- def check_for_comment_about_bot(pronoun, noun, adjective):
- """Check if the user's input was about the bot itself, in which case try to fashion a response
- that feels right based on their input. Returns the new best sentence, or None."""
- resp = None
- if pronoun == 'I' and (noun or adjective):
- if noun:
- if random.choice((True, False)):
- resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
- else:
- resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
- else:
- resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
- return resp
- # Template for responses that include a direct noun which is indefinite/uncountable
- SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
- "My last startup totally crushed the {noun} vertical",
- "Were you aware I was a serial entrepreneur in the {noun} sector?",
- "My startup is Uber for {noun}",
- "I really consider myself an expert on {noun}",
- ]
- SELF_VERBS_WITH_NOUN_LOWER = [
- "Yeah but I know a lot about {noun}",
- "My bros always ask me about {noun}",
- ]
- SELF_VERBS_WITH_ADJECTIVE = [
- "I'm personally building the {adjective} Economy",
- "I consider myself to be a {adjective}preneur",
- ]
- # end
- def preprocess_text(sentence):
- """Handle some weird edge cases in parsing, like 'i' needing to be capitalized
- to be correctly identified as a pronoun"""
- cleaned = []
- words = sentence.split(' ')
- for w in words:
- if w == 'i':
- w = 'I'
- if w == "i'm":
- w = "I'm"
- cleaned.append(w)
- return ' '.join(cleaned)
- # start:example-respond.py
- def respond(sentence):
- """Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
- cleaned = preprocess_text(sentence)
- parsed = TextBlob(cleaned)
- # Loop through all the sentences, if more than one. This will help extract the most relevant
- # response text even across multiple sentences (for example if there was no obvious direct noun
- # in one sentence
- pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)
- # If we said something about the bot and used some kind of direct noun, construct the
- # sentence around that, discarding the other candidates
- resp = check_for_comment_about_bot(pronoun, noun, adjective)
- # If we just greeted the bot, we'll use a return greeting
- if not resp:
- resp = check_for_greeting(parsed)
- if not resp:
- # If we didn't override the final sentence, try to construct a new one:
- if not pronoun:
- resp = random.choice(NONE_RESPONSES)
- elif pronoun == 'I' and not verb:
- resp = random.choice(COMMENTS_ABOUT_SELF)
- else:
- resp = construct_response(pronoun, noun, verb)
- # If we got through all that with nothing, use a random response
- if not resp:
- resp = random.choice(NONE_RESPONSES)
- logger.info("Returning phrase '%s'", resp)
- # Check that we're not going to say anything obviously offensive
- filter_response(resp)
- return resp
- def find_candidate_parts_of_speech(parsed):
- """Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
- Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
- pronoun = None
- noun = None
- adjective = None
- verb = None
- for sent in parsed.sentences:
- pronoun = find_pronoun(sent)
- noun = find_noun(sent)
- adjective = find_adjective(sent)
- verb = find_verb(sent)
- logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
- return pronoun, noun, adjective, verb
- # end
- # start:example-filter.py
- def filter_response(resp):
- """Don't allow any words to match our filter list"""
- tokenized = resp.split(' ')
- for word in tokenized:
- if '@' in word or '#' in word or '!' in word:
- raise UnacceptableUtteranceException()
- for s in FILTER_WORDS:
- if word.lower().startswith(s):
- raise UnacceptableUtteranceException()
- # end
- if __name__ == '__main__':
- import sys
- # Usage:
- # python chatbot.py "I am an engineer"
- if (len(sys.argv) > 0):
- saying = sys.argv[1]
- else:
- saying = "How are you, bot?"
- print(chatbot(saying))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement