Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from re import split
- def process_line(words, word_dict):
- for word in words:
- if word in word_dict:
- word_dict[word] += 1
- else:
- word_dict[word] = 1
- def process_dict(word_dict):
- temp_list = []
- for key, value in word_dict.items():
- temp_list.append((value, key))
- temp_list.sort()
- return temp_list
- def format_print(input_list, reverse, word_num):
- if reverse:
- input_list.sort(reverse=True)
- print "n", ("[Unique Words: " + str(word_num) + "]").center(35, "=")
- print "-"*35 + "n", "%-16s %s %16s" % ("Word", "|", "Count"), "n", "-"*35
- for count, word in input_list:
- print "%-16s %s %16d" % (word, "|", count)
- def word_count(_file, max_to_min=False):
- txt = open(_file, "rU")
- word_dict = {}
- for line in txt:
- if line.replace(" ", "") != ("n" or None):
- process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
- txt.close()
- final_list = process_dict(word_dict)
- format_print(final_list, max_to_min, len(word_dict))
- word_count("Gettysburg.txt", True)
- def word_count(_file, max_to_min=False):
- txt = open(_file, "rU")
- word_dict = {}
- for line in txt:
- if line.replace(" ", "") != ("n" or None):
- process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
- txt.close()
- final_list = process_dict(word_dict)
- format_print(final_list, max_to_min, len(word_dict))
- def word_count(filename, max_to_min=False):
- with open(filename, "rU") as f:
- word_dict = {}
- for line in f:
- if line.replace(" ", "") != ("n" or None):
- process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
- final_list = process_dict(word_dict)
- format_print(final_list, max_to_min, len(word_dict))
- def process_line(words, word_dict):
- for word in words:
- if word in word_dict:
- word_dict[word] += 1
- else:
- word_dict[word] = 1
- from collections import Counter
- .
- .
- .
- def word_count(filename, max_to_min=False):
- with open(filename, "rU") as f:
- counter = Counter()
- for line in f:
- if line.replace(" ", "") != ("n" or None):
- counter.update(filter(None, split("[^a-zA-Z']+", line.lower())))
- final_list = process_dict(counter)
- format_print(final_list, max_to_min, len(counter))
- from collections import Counter
- .
- .
- .
- def word_count(filename, max_to_min=False):
- with open(filename, "rU") as f:
- counter = Counter()
- for line in f:
- line = line.strip().lower()
- if not line:
- continue
- counter.update(filter(None, split("[^a-zA-Z']+", line)))
- final_list = process_dict(counter)
- format_print(final_list, max_to_min, len(counter))
- from collections import Counter
- .
- .
- .
- def word_count(filename, max_to_min=False):
- with open(filename, "rU") as f:
- counter = Counter()
- for line in f:
- line = line.strip().lower()
- if not line:
- continue
- counter.update(x for x in split("[^a-zA-Z']+", line) if x)
- final_list = process_dict(counter)
- format_print(final_list, max_to_min, len(counter))
- def process_dict(word_dict):
- temp_list = []
- for key, value in word_dict.items():
- temp_list.append((value, key))
- temp_list.sort()
- return temp_list
- def process_dict(counter):
- temp_list = map(lambda (a, b): (b, a), counter.items())
- temp_list.sort()
- return temp_list
- from collections import Counter
- from re import split
- BANNER = "-" * 35
- def format_print(counter, is_reverse=False):
- lst = counter.items()
- lst.sort(key=lambda (a, b): (b, a), reverse=is_reverse)
- print ("[Unique Words: %d]" % len(lst)).center(35, "=")
- print "%-16s | %16s" % ("Word", "Count")
- print BANNER
- for word, count in lst:
- print "%-16s | %16d" % (word, count)
- def count_words(filename):
- counter = Counter()
- with open(filename, "rU") as f:
- for line in f:
- line = line.strip().lower()
- if not line:
- continue
- counter.update(x for x in split("[^a-zA-Z']+", line) if x)
- return counter
- format_print(count_words("Gettysburg.txt"), is_reverse=False)
- with open(_file, "rU") as src:
- ...
- inputfile=input("Enter the name (with file extension) of the file you would like to spellcheck: ")
- fileToCheck = open(inputfile, 'rt') #opens the file
- print("File found.")
- textToCheck=[]
- for line in fileToCheck:
- sentence=line.split() #splits it into words
- for word in sentence:
- textToCheck.append(word) #adds the rord to the list
- fileToCheck.close()
- print("File imported.")
- print(str(len(textToCheck))+" words found in input file.") #prints the length of the list (number of words)
- import os
- outputText="<html>n<head>n<title>Document</title>n<style>insert stuff here</style>n<link rel="stylesheet" href="linktocss.css">n</head>n<h1>Document</h1>n<body>"
- filename=inputfile+".html"
- outputText+="</body></html>" #finishes off html
- outputFile = open(filename, 'wt')
- outputFile.write(outputText)
- outputFile.close()
- os.startfile(filename) #automatically open file
- import re
- from collections
- import Counter
- f=open('C:Python27myfile.txt', 'r')
- passage = f.read()
- words = re.findall(r'w+', passage)
- cap_words = [word.upper() for word in words]
- # Converting to uppercase so that 'Is' & 'is' like words should be considered as same words
- word_counts = Counter(cap_words)
- print(word_counts)
- Hello
Add Comment
Please, Sign In to add comment