Natural Language Toolkit example

import nltk;
from urllib import urlopen;
url = "http://news.bbc.co.uk/2/hi/health/2284783.stm";
html = urlopen(url).read();
raw = nltk.clean_html(html);

text = nltk.word_tokenize(raw);

# f = open ("C:\\folder\\file.txt");
# f.readline();

from nltk.corpus import brown;
#from nltk.corpus import mac_morpho; #PT-BR
train_corpus = brown.tagged_sents( categories = "news" );
tags = [ tag for ( word, tag ) in  brown.tagged_words( categories = "news" )];
nltk.FreqDist(tags).max(); # example of how it works. Shows the most frequent words

default_tagger = nltk.DefaultTagger('NN'); # Example
default_tagger.tag(text); # Example, shows how tagging works. This one will classify everything as noun ('NN')

t0 = nltk.DefaultTagger('NN');
t1 = nltk.UnigramTagger(train_corpus, backoff = t0); # Use the trained "train_corpus" to classify, and classify as specified by backoff if there's no match for this training corpus.
t1.tag(text);