Guest User

Untitled

a guest
Sep 21st, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.49 KB | None | 0 0
  1. from re import split
  2.  
  3.  
  4. def process_line(words, word_dict):
  5. for word in words:
  6. if word in word_dict:
  7. word_dict[word] += 1
  8. else:
  9. word_dict[word] = 1
  10.  
  11.  
  12. def process_dict(word_dict):
  13. temp_list = []
  14. for key, value in word_dict.items():
  15. temp_list.append((value, key))
  16.  
  17. temp_list.sort()
  18. return temp_list
  19.  
  20.  
  21. def format_print(input_list, reverse, word_num):
  22. if reverse:
  23. input_list.sort(reverse=True)
  24.  
  25. print "n", ("[Unique Words: " + str(word_num) + "]").center(35, "=")
  26. print "-"*35 + "n", "%-16s %s %16s" % ("Word", "|", "Count"), "n", "-"*35
  27. for count, word in input_list:
  28. print "%-16s %s %16d" % (word, "|", count)
  29.  
  30.  
  31. def word_count(_file, max_to_min=False):
  32. txt = open(_file, "rU")
  33. word_dict = {}
  34. for line in txt:
  35. if line.replace(" ", "") != ("n" or None):
  36. process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
  37.  
  38. txt.close()
  39. final_list = process_dict(word_dict)
  40. format_print(final_list, max_to_min, len(word_dict))
  41.  
  42.  
  43. word_count("Gettysburg.txt", True)
  44.  
  45. def word_count(_file, max_to_min=False):
  46. txt = open(_file, "rU")
  47. word_dict = {}
  48. for line in txt:
  49. if line.replace(" ", "") != ("n" or None):
  50. process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
  51.  
  52. txt.close()
  53. final_list = process_dict(word_dict)
  54. format_print(final_list, max_to_min, len(word_dict))
  55.  
  56. def word_count(filename, max_to_min=False):
  57. with open(filename, "rU") as f:
  58. word_dict = {}
  59. for line in f:
  60. if line.replace(" ", "") != ("n" or None):
  61. process_line(filter(None, split("[^a-zA-Z']+", line.lower())), word_dict)
  62.  
  63. final_list = process_dict(word_dict)
  64. format_print(final_list, max_to_min, len(word_dict))
  65.  
  66. def process_line(words, word_dict):
  67. for word in words:
  68. if word in word_dict:
  69. word_dict[word] += 1
  70. else:
  71. word_dict[word] = 1
  72.  
  73. from collections import Counter
  74. .
  75. .
  76. .
  77. def word_count(filename, max_to_min=False):
  78. with open(filename, "rU") as f:
  79. counter = Counter()
  80. for line in f:
  81. if line.replace(" ", "") != ("n" or None):
  82. counter.update(filter(None, split("[^a-zA-Z']+", line.lower())))
  83.  
  84. final_list = process_dict(counter)
  85. format_print(final_list, max_to_min, len(counter))
  86.  
  87. from collections import Counter
  88. .
  89. .
  90. .
  91. def word_count(filename, max_to_min=False):
  92. with open(filename, "rU") as f:
  93. counter = Counter()
  94. for line in f:
  95. line = line.strip().lower()
  96. if not line:
  97. continue
  98. counter.update(filter(None, split("[^a-zA-Z']+", line)))
  99.  
  100. final_list = process_dict(counter)
  101. format_print(final_list, max_to_min, len(counter))
  102.  
  103. from collections import Counter
  104. .
  105. .
  106. .
  107. def word_count(filename, max_to_min=False):
  108. with open(filename, "rU") as f:
  109. counter = Counter()
  110. for line in f:
  111. line = line.strip().lower()
  112. if not line:
  113. continue
  114. counter.update(x for x in split("[^a-zA-Z']+", line) if x)
  115.  
  116. final_list = process_dict(counter)
  117. format_print(final_list, max_to_min, len(counter))
  118.  
  119. def process_dict(word_dict):
  120. temp_list = []
  121. for key, value in word_dict.items():
  122. temp_list.append((value, key))
  123.  
  124. temp_list.sort()
  125. return temp_list
  126.  
  127. def process_dict(counter):
  128. temp_list = map(lambda (a, b): (b, a), counter.items())
  129. temp_list.sort()
  130. return temp_list
  131.  
  132. from collections import Counter
  133. from re import split
  134.  
  135. BANNER = "-" * 35
  136.  
  137. def format_print(counter, is_reverse=False):
  138. lst = counter.items()
  139. lst.sort(key=lambda (a, b): (b, a), reverse=is_reverse)
  140. print ("[Unique Words: %d]" % len(lst)).center(35, "=")
  141. print "%-16s | %16s" % ("Word", "Count")
  142. print BANNER
  143. for word, count in lst:
  144. print "%-16s | %16d" % (word, count)
  145.  
  146. def count_words(filename):
  147. counter = Counter()
  148. with open(filename, "rU") as f:
  149. for line in f:
  150. line = line.strip().lower()
  151. if not line:
  152. continue
  153. counter.update(x for x in split("[^a-zA-Z']+", line) if x)
  154. return counter
  155.  
  156. format_print(count_words("Gettysburg.txt"), is_reverse=False)
  157.  
  158. with open(_file, "rU") as src:
  159. ...
  160.  
  161. inputfile=input("Enter the name (with file extension) of the file you would like to spellcheck: ")
  162. fileToCheck = open(inputfile, 'rt') #opens the file
  163. print("File found.")
  164. textToCheck=[]
  165. for line in fileToCheck:
  166. sentence=line.split() #splits it into words
  167. for word in sentence:
  168. textToCheck.append(word) #adds the rord to the list
  169. fileToCheck.close()
  170. print("File imported.")
  171. print(str(len(textToCheck))+" words found in input file.") #prints the length of the list (number of words)
  172.  
  173. import os
  174. outputText="<html>n<head>n<title>Document</title>n<style>insert stuff here</style>n<link rel="stylesheet" href="linktocss.css">n</head>n<h1>Document</h1>n<body>"
  175.  
  176. filename=inputfile+".html"
  177. outputText+="</body></html>" #finishes off html
  178. outputFile = open(filename, 'wt')
  179. outputFile.write(outputText)
  180. outputFile.close()
  181. os.startfile(filename) #automatically open file
  182.  
  183. import re
  184. from collections
  185. import Counter
  186. f=open('C:Python27myfile.txt', 'r')
  187. passage = f.read()
  188. words = re.findall(r'w+', passage)
  189. cap_words = [word.upper() for word in words]
  190. # Converting to uppercase so that 'Is' & 'is' like words should be considered as same words
  191. word_counts = Counter(cap_words)
  192. print(word_counts)
  193.  
  194. Hello
Add Comment
Please, Sign In to add comment