Advertisement
Guest User

Untitled

a guest
Jun 16th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.24 KB | None | 0 0
  1. #!/usr/bin/python
  2. #-*- coding: utf-8 -*-
  3.  
  4. import math
  5. import os
  6. import argparse
  7. import commands
  8. import requests
  9. import re
  10. from bs4 import BeautifulSoup
  11. from flask import Flask, jsonify, request, redirect, url_for, render_template
  12. from werkzeug import secure_filename
  13. from nltk.corpus import stopwords
  14. from nltk.tokenize import word_tokenize
  15.  
  16. word_d = {}
  17. sent_list = []
  18.  
  19. def cleanStr(data):
  20. cleanr = re.compile('<.*?>')
  21. cleanText = re.sub(cleanr,'',data)
  22. cleanText = re.sub('[:&\;/.,")(]','',cleanText)
  23.  
  24. return cleanText
  25.  
  26. def process_new_sentence(s):
  27. sent_list.append(s)
  28. tokenized = word_tokenize(s.lower())
  29. for word in tokenized:
  30. if word in stopwords.words('english'):
  31. continue
  32. if word not in word_d.keys():
  33. word_d[word]=0
  34. word_d[word] += 1
  35.  
  36.  
  37. def compute_tf(s):
  38. bow = set()
  39. wordcount_d = {}
  40. tokenized = word_tokenize(s)
  41. for tok in tokenized:
  42. if tok in stopwords.words('english'):
  43. continue
  44. if tok not in wordcount_d.keys():
  45. wordcount_d[tok]=0
  46. wordcount_d[tok] += 1
  47. bow.add(tok)
  48. tf_d = {}
  49. for word, tfval in wordcount_d.iteritems():
  50. tf_d[word] = float(tfval) / len(bow)
  51.  
  52. return tf_d
  53.  
  54.  
  55. def compute_idf():
  56. Dval = len(sent_list)
  57. # build set of words
  58. bow = set()
  59. for i in range(0,len(sent_list)):
  60. tokenized = word_tokenize(sent_list[i])
  61. for tok in tokenized:
  62. if tok in stopwords.words('english'):
  63. continue
  64. bow.add(tok)
  65. idf_d = {}
  66. for t in bow:
  67. cnt = 0
  68. for s in sent_list:
  69. if t in word_tokenize(s):
  70. cnt += 1
  71. idf_d[t] = math.log10(float(len(sent_list))/cnt)
  72.  
  73. return idf_d
  74.  
  75. UPLOAD_FOLDER="path/to/the/uploads"
  76. ALLOWED_EXTENSIONS=set(['txt'])
  77.  
  78. app=Flask(__name__)
  79. app.config['UPLOAD_FOLDER']=UPLOAD_FOLDER
  80.  
  81. def allowed_file(filename):
  82. return '.' in filename and \
  83. filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
  84.  
  85. @app.route('/', methods=['GET'])
  86. #Here there is no function in main page :D
  87. def main_page():
  88.  
  89. return render_template('main_page.html')
  90.  
  91.  
  92. @app.route('/url_checker1', methods=['POST', 'GET'])
  93. def checker1():
  94. if request.method=='POST':
  95. url=request.form['input']
  96. res=requests.get(url)
  97. soup=BeautifulSoup(res.content, 'html.parser')
  98. price=soup.find(id='qwidget_lastsale').text
  99. price = cleanStr(price)
  100.  
  101. result = {}
  102.  
  103. for sent in price.split('\n'):
  104. process_new_sentence(sent)
  105.  
  106. idf_d = compute_idf()
  107. for i in range(0,len(sent_list)):
  108. tf_d = compute_tf(sent_list[i])
  109. for word,tfval in tf_d.iteritems():
  110. if word in result.keys():
  111. if result[word] < tfval*idf_d[word]:
  112. result[word] = tfval*idf_d[word]
  113. else:
  114. result[word] = tfval*idf_d[word]
  115.  
  116. Last = sorted(result.items(), key = lambda x: x[1], reverse = True)
  117.  
  118. cnt = 0
  119.  
  120. for i in wordL:
  121. print "%-15s%10d" % (i[0], i[1])
  122. cnt += 1
  123. if cnt == 10:
  124. break
  125.  
  126.  
  127. return render_template('checker1.html',price=price)
  128.  
  129.  
  130. @app.route('/url_checker2', methods=['POST', 'GET'])
  131. def checker2():
  132. if request.method == 'POST':
  133. f = request.files['file']
  134. f.save(secure_filename(f.filename))
  135. contents = ""
  136. with open(f.filename, 'r') as f:
  137. content = f.readlines()
  138. content = [x.strip() for x in content]
  139. return content[0]
  140.  
  141.  
  142. if __name__=='__main__':
  143. try:
  144. parser=argparse.ArgumentParser(description="")
  145. parser.add_argument('--listen-port', type=str, required=True, help='REST service listen port')
  146. args=parser.parse_args()
  147. listen_port=args.listen_port
  148. except Exception, e:
  149. print('Error: %s' % str(e))
  150.  
  151. ipaddr=commands.getoutput("hostname -I").split()[0]
  152. print "Starting the service with ip_addr"+ipaddr
  153. app.run(debug=False, host=ipaddr, port=int(listen_port))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement