Advertisement
Python253

nltk_nlp_program

Mar 8th, 2024 (edited)
670
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.82 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # Filename: nltk_nlp_program.py
  4. # Author: Jeoi Reqi
  5.  
  6. """
  7. Welcome to the NLTK NLP Tool!
  8.  
  9. This interactive script provides a Natural Language Processing (NLP) tool powered by the Natural Language Toolkit (NLTK).
  10. It offers various text analysis functionalities through a user-friendly menu.
  11.  
  12. Requirements:
  13. - Python 3
  14. - NLTK library with 'punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', and 'vader_lexicon' resources
  15.  
  16. Usage:
  17. 1. Run the script.
  18. 2. Follow the menu prompts to select the desired analysis.
  19. 3. Enter the text for analysis.
  20.  
  21. Menu Options:
  22. 1. Tokenization: Breaks text into words and sentences.
  23. 2. Part-of-Speech Tagging: Identifies grammatical parts of speech for each word.
  24. 3. Named Entity Recognition: Identifies entities such as persons, organizations, and locations.
  25. 4. Sentiment Analysis: Determines sentiment polarity (negative, neutral, positive).
  26.  
  27. Examples:
  28.  
  29. 1. Tokenization
  30. Enter the text: Hello, World!
  31. Tokenized Words: ['Hello', ',', 'World', '!']
  32. Tokenized Sentences: ['Hello, World!']
  33.  
  34. 2. Part-of-Speech Tagging
  35. Enter the text: Hello, World!
  36. Part-of-Speech Tags: [('Hello', 'NNP'), (',', ','), ('World', 'NNP'), ('!', '.')]
  37.  
  38. 3. Named Entity Recognition
  39. Enter the text: Hello, World!
  40. Named Entity Recognition: (S (GPE Hello/NNP) ,/, (PERSON World/NNP) !/.)
  41.  
  42. 4. Sentiment Analysis
  43. Enter the text: Hello, World!
  44. Sentiment Analysis: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
  45.  
  46.  
  47. Enjoy exploring the world of Natural Language Processing with NLTK!
  48. """
  49.  
  50. # Imports
  51. import nltk
  52. from nltk.tokenize import word_tokenize, sent_tokenize
  53. from nltk import pos_tag, ne_chunk
  54. from nltk.sentiment import SentimentIntensityAnalyzer
  55.  
  56. # Downloads
  57. nltk.download('punkt')
  58. nltk.download('averaged_perceptron_tagger')
  59. nltk.download('maxent_ne_chunker')
  60. nltk.download('vader_lexicon')
  61.  
  62. # Tokenization Function
  63. def tokenize_nltk(text):
  64.     """
  65.    Tokenizes the input text into words and sentences.
  66.  
  67.    Parameters:
  68.    - text (str): Input text for tokenization.
  69.  
  70.    Returns:
  71.    Tuple(List[str], List[str]): Tokenized words and sentences.
  72.    """
  73.     words = word_tokenize(text)
  74.     sentences = sent_tokenize(text)
  75.     return words, sentences
  76.  
  77. # Part-of-Speech Tagging Function
  78. def pos_tagging_nltk(text):
  79.     """
  80.    Performs part-of-speech tagging on the input text.
  81.  
  82.    Parameters:
  83.    - text (str): Input text for part-of-speech tagging.
  84.  
  85.    Returns:
  86.    List[Tuple[str, str]]: Part-of-speech tags for each word.
  87.    """
  88.     words = word_tokenize(text)
  89.     pos_tags = pos_tag(words)
  90.     return pos_tags
  91.  
  92. # Named Entity Recognition Function
  93. def named_entity_recognition_nltk(text):
  94.     """
  95.    Identifies named entities in the input text.
  96.  
  97.    Parameters:
  98.    - text (str): Input text for named entity recognition.
  99.  
  100.    Returns:
  101.    nltk.tree.Tree: Named entity recognition result.
  102.    """
  103.     words = word_tokenize(text)
  104.     pos_tags = pos_tag(words)
  105.     ner_result = ne_chunk(pos_tags)
  106.     return ner_result
  107.  
  108. # Sentiment Analysis Function
  109. def sentiment_analysis_nltk(text):
  110.     """
  111.    Analyzes the sentiment of the input text.
  112.  
  113.    Parameters:
  114.    - text (str): Input text for sentiment analysis.
  115.  
  116.    Returns:
  117.    Dict[str, float]: Sentiment scores (negative, neutral, positive, compound).
  118.    """
  119.     sia = SentimentIntensityAnalyzer()
  120.     sentiment_score = sia.polarity_scores(text)
  121.     return sentiment_score
  122.  
  123. # Menu & Options Function
  124. def main_nltk():
  125.     """
  126.    Main function for the NLTK NLP tool, providing a user-friendly menu for text analysis.
  127.    """
  128.     print("Welcome to the NLTK NLP Tool!")
  129.     # Menu Printing Function
  130.     while True:
  131.         print("\nMenu:")
  132.         print("1. Tokenization")
  133.         print("2. Part-of-Speech Tagging")
  134.         print("3. Named Entity Recognition")
  135.         print("4. Sentiment Analysis")
  136.         print("0. Exit")
  137.  
  138.         choice = input("Enter your choice (0-4): ")
  139.  
  140.         if choice == '0':
  141.             print("Exiting the NLTK NLP Tool. Goodbye!")
  142.             break
  143.  
  144.         text = input("Enter the text: ")
  145.  
  146.         if choice == '1':
  147.             words, sentences = tokenize_nltk(text)
  148.             print("Tokenized Words:", words)
  149.             print("Tokenized Sentences:", sentences)
  150.  
  151.         elif choice == '2':
  152.             pos_tags = pos_tagging_nltk(text)
  153.             print("Part-of-Speech Tags:", pos_tags)
  154.  
  155.         elif choice == '3':
  156.             ner_result = named_entity_recognition_nltk(text)
  157.             print("Named Entity Recognition:", ner_result)
  158.  
  159.         elif choice == '4':
  160.             sentiment_score = sentiment_analysis_nltk(text)
  161.             print("Sentiment Analysis:", sentiment_score)
  162.  
  163.         else:
  164.             print("Invalid choice. Please enter a number between 0 and 4.")
  165.  
  166. if __name__ == "__main__":
  167.     main_nltk()
  168.  
  169.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement