Untitled

import nltk
from nltk.tokenize import word_tokenize
from nltk.text import Text

nltk.download('punkt')
string ="It is the branch of data science that consists of systematic processes for analyzing, understanding, and how to driving information from the text data in a smart and efficient manner."

tokens = word_tokenize(string)
print(tokens)

tokens = [word.lower() for word in tokens]
tokens[:5]