Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os, json, re, math, requests
- MedL = 80
- Nl = 0 #Note longueur
- Nm = 0 #Note mot complexe
- Np = 0 #Note popularite
- def postLengthMed (postList):
- length = 0
- for i in range(len(postList)):
- length += len(postList[i])
- try:
- return length/len(postList)
- except ZeroDivisionError:
- return 0
- def postQuality (postList, dictGrade):
- badWordCount = 0
- lengthMed = 0
- postLength = postLengthMed(postList)
- keyList= sorted(dictGrade.keys())
- for i in range(len(postList)):
- lengthPost = len(postList[i])
- Nl = 0
- if lengthPost > MedL:
- Nl = 0.25*math.log(lengthPost-MedL)
- badWordCount += postList[i].lower().count('fuck')
- badWordCount += postList[i].lower().count('shit')
- badWordCount += postList[i].lower().count('tits')
- badWordCount += postList[i].lower().count('fag')
- badWordCount += postList[i].lower().count('nigger')
- badWordCount += postList[i].lower().count('cunt')
- badWordCount += postList[i].lower().count('kike')
- badWordCount += postList[i].lower().count('kys')
- badWordCount += postList[i].lower().count('cuck')
- badWordCount += postList[i].lower().count('pajeet')
- badWordCount += postList[i].lower().count('spic')
- badWordCount += postList[i].lower().count('retard')
- badWordCount += postList[i].lower().count('sperg')
- badWordCount += postList[i].lower().count('autist')
- badWordCount += postList[i].lower().count('shitskin')
- badWordCount += postList[i].lower().count('newfag')
- badWordCount += postList[i].lower().count('goy')
- if badWordCount > 1:
- Ni = 0
- else:
- Ni = 1
- dictGrade[keyList[i]] += round(Ni+Nl, 2)
- return dictGrade
- def repliesGrade(parsed_json, postList):
- numReplies = []
- dictGrade = {}
- for i in range(len(postList)):
- Grade = 1
- replies = 0
- string = parsed_json['posts'][i]['com']
- no = parsed_json['posts'][i]['no']
- if string.count('<a href="#p')!= 0:
- string = string.split('<a href="#p', 1)
- string = string[1].split('"', 1)
- string = str(string)
- if string.lower().count('thanks')!= 0 or string.lower().count('thx') != 0:
- Grade += 1
- replies += 1
- Grade += (replies*0.25)%1
- dictGrade[no] = Grade
- return dictGrade
- def cleanPost(postList):
- for i in range(len(postList)):
- if (postList[i].count('<a href="#p') != 0):
- string = postList[i].split('<a href="#p', 1)
- string = string[1].split('"', 1)
- postList[i]= re.sub('>[^>]+<','', postList[i])
- postList[i]= re.sub('<[^>]+>','', postList[i])
- postList[i]= re.sub(''',"'", postList[i])
- return postList
- def postArray(parsed_json):
- postList=[]
- for i in range(300):
- post=''
- try:
- name = parsed_json['posts'][i]['name']
- number = parsed_json['posts'][i]['no']
- comment = parsed_json['posts'][i]['com']
- post += str(number)+'\n'+comment+'\n'+'\n'+name
- postList.insert(0, post)
- except IndexError:
- break
- except KeyError:
- break
- postList = cleanPost(postList)
- return postList
- def getBoardQuality(board, thread):
- postList=[]
- dictGrade = {}
- medInsult = 0
- Insults = 0
- percent = 0
- postLength = 0
- threadFound = 1
- response = requests.get("http://a.4cdn.org/"+board+"/catalog.json")
- parsed_json = response.json()
- nmbPages = len(parsed_json)
- threadspPages = len(parsed_json[0]['threads'])-1
- for i in parsed_json:
- try:
- for j in range(threadspPages):
- Insults = 0
- percent += 1
- try:
- string = i['threads'][j]['sub']
- if thread != None and string.find(thread) != -1:
- print 'found ', thread
- threadFound = 0
- DPTUrl = "http://a.4cdn.org/"+board+"/thread/"
- DPTUrl += str(i['threads'][j]['no'])+".json"
- response = requests.get(DPTUrl)
- parsed_json = response.json()
- postList = postArray(parsed_json)
- dictGrade = repliesGrade(parsed_json, postList)
- dictGrade = postQuality(postList, dictGrade)
- return dictGrade
- elif thread != None:
- continue
- DPTUrl = "http://a.4cdn.org/"+board+"/thread/"
- DPTUrl += str(i['threads'][j]['no'])+".json"
- response = urllib2.urlopen(DPTUrl)
- html = response.read()
- parsed_json = json.loads(html)
- dictGrade = repliesGrade(parsed_json, postList)
- dictGrade = postQuality(postList, dictGrade)
- except KeyError:
- continue
- except IndexError:
- print "Index Error"
- pass
- if threadFound == 1:
- print "Could not find thread!"
- return 0
- return postLength/(threadspPages*nmbPages)
- dictGrade = getBoardQuality("g", "dpt")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement