Advertisement
Guest User

Untitled

a guest
Dec 4th, 2016
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.67 KB | None | 0 0
  1. import os, json, re, math, requests
  2.  
  3. MedL = 80
  4. Nl = 0 #Note longueur
  5. Nm = 0 #Note mot complexe
  6. Np = 0 #Note popularite
  7.  
  8. def postLengthMed (postList):
  9. length = 0
  10. for i in range(len(postList)):
  11. length += len(postList[i])
  12. try:
  13. return length/len(postList)
  14. except ZeroDivisionError:
  15. return 0
  16.  
  17.  
  18.  
  19. def postQuality (postList, dictGrade):
  20. badWordCount = 0
  21. lengthMed = 0
  22. postLength = postLengthMed(postList)
  23.  
  24. keyList= sorted(dictGrade.keys())
  25.  
  26. for i in range(len(postList)):
  27. lengthPost = len(postList[i])
  28. Nl = 0
  29. if lengthPost > MedL:
  30. Nl = 0.25*math.log(lengthPost-MedL)
  31.  
  32. badWordCount += postList[i].lower().count('fuck')
  33. badWordCount += postList[i].lower().count('shit')
  34. badWordCount += postList[i].lower().count('tits')
  35. badWordCount += postList[i].lower().count('fag')
  36. badWordCount += postList[i].lower().count('nigger')
  37. badWordCount += postList[i].lower().count('cunt')
  38. badWordCount += postList[i].lower().count('kike')
  39. badWordCount += postList[i].lower().count('kys')
  40. badWordCount += postList[i].lower().count('cuck')
  41. badWordCount += postList[i].lower().count('pajeet')
  42. badWordCount += postList[i].lower().count('spic')
  43. badWordCount += postList[i].lower().count('retard')
  44. badWordCount += postList[i].lower().count('sperg')
  45. badWordCount += postList[i].lower().count('autist')
  46. badWordCount += postList[i].lower().count('shitskin')
  47. badWordCount += postList[i].lower().count('newfag')
  48. badWordCount += postList[i].lower().count('goy')
  49.  
  50. if badWordCount > 1:
  51. Ni = 0
  52. else:
  53. Ni = 1
  54. dictGrade[keyList[i]] += round(Ni+Nl, 2)
  55. return dictGrade
  56.  
  57.  
  58.  
  59. def repliesGrade(parsed_json, postList):
  60. numReplies = []
  61. dictGrade = {}
  62. for i in range(len(postList)):
  63. Grade = 1
  64. replies = 0
  65. string = parsed_json['posts'][i]['com']
  66. no = parsed_json['posts'][i]['no']
  67. if string.count('<a href="#p')!= 0:
  68. string = string.split('<a href="#p', 1)
  69. string = string[1].split('"', 1)
  70. string = str(string)
  71. if string.lower().count('thanks')!= 0 or string.lower().count('thx') != 0:
  72. Grade += 1
  73. replies += 1
  74. Grade += (replies*0.25)%1
  75. dictGrade[no] = Grade
  76. return dictGrade
  77.  
  78.  
  79.  
  80.  
  81. def cleanPost(postList):
  82. for i in range(len(postList)):
  83.  
  84. if (postList[i].count('<a href="#p') != 0):
  85. string = postList[i].split('<a href="#p', 1)
  86. string = string[1].split('"', 1)
  87.  
  88. postList[i]= re.sub('>[^>]+<','', postList[i])
  89. postList[i]= re.sub('<[^>]+>','', postList[i])
  90. postList[i]= re.sub('&#039;',"'", postList[i])
  91. return postList
  92.  
  93.  
  94. def postArray(parsed_json):
  95. postList=[]
  96. for i in range(300):
  97. post=''
  98. try:
  99. name = parsed_json['posts'][i]['name']
  100. number = parsed_json['posts'][i]['no']
  101.  
  102. comment = parsed_json['posts'][i]['com']
  103. post += str(number)+'\n'+comment+'\n'+'\n'+name
  104. postList.insert(0, post)
  105. except IndexError:
  106. break
  107. except KeyError:
  108. break
  109.  
  110. postList = cleanPost(postList)
  111.  
  112. return postList
  113.  
  114. def getBoardQuality(board, thread):
  115. postList=[]
  116. dictGrade = {}
  117.  
  118. medInsult = 0
  119. Insults = 0
  120. percent = 0
  121. postLength = 0
  122. threadFound = 1
  123.  
  124. response = requests.get("http://a.4cdn.org/"+board+"/catalog.json")
  125. parsed_json = response.json()
  126.  
  127. nmbPages = len(parsed_json)
  128. threadspPages = len(parsed_json[0]['threads'])-1
  129. for i in parsed_json:
  130. try:
  131. for j in range(threadspPages):
  132. Insults = 0
  133. percent += 1
  134.  
  135. try:
  136. string = i['threads'][j]['sub']
  137. if thread != None and string.find(thread) != -1:
  138. print 'found ', thread
  139.  
  140. threadFound = 0
  141.  
  142. DPTUrl = "http://a.4cdn.org/"+board+"/thread/"
  143. DPTUrl += str(i['threads'][j]['no'])+".json"
  144.  
  145. response = requests.get(DPTUrl)
  146. parsed_json = response.json()
  147.  
  148. postList = postArray(parsed_json)
  149.  
  150. dictGrade = repliesGrade(parsed_json, postList)
  151. dictGrade = postQuality(postList, dictGrade)
  152.  
  153. return dictGrade
  154. elif thread != None:
  155. continue
  156.  
  157. DPTUrl = "http://a.4cdn.org/"+board+"/thread/"
  158. DPTUrl += str(i['threads'][j]['no'])+".json"
  159. response = urllib2.urlopen(DPTUrl)
  160. html = response.read()
  161. parsed_json = json.loads(html)
  162.  
  163. dictGrade = repliesGrade(parsed_json, postList)
  164. dictGrade = postQuality(postList, dictGrade)
  165.  
  166. except KeyError:
  167. continue
  168.  
  169.  
  170. except IndexError:
  171. print "Index Error"
  172. pass
  173.  
  174. if threadFound == 1:
  175. print "Could not find thread!"
  176. return 0
  177.  
  178. return postLength/(threadspPages*nmbPages)
  179.  
  180.  
  181. dictGrade = getBoardQuality("g", "dpt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement