Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Pixiv search URL: http://www.pixiv.net/search.php?s_mode=s_tag&word=<search term, UTF-8>
- #The list of tit tags is at: http://dic.pixiv.net/a/%e6%9d%b1%e6%96%b9%e3%81%8a%e3%81%a3%e3%81%b1%e3%81%84%e3%82%bf%e3%82%b0%e4%b8%80%e8%a6%a7
- #Result count is contained in: <span class="count-badge">500results</span>
- import sys
- import traceback
- import re
- import os
- import cookielib
- import urllib, urllib2, time
- from datetime import date, timedelta
- #Globals
- jar = cookielib.CookieJar()
- opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
- def load_characters():
- infile = open('characters.txt', 'r')
- chars = {}
- for line in infile:
- if line == '\n':
- continue
- data = line.strip().split('\t')
- eng = unicode(data[0].strip(), 'sjis')
- jap = unicode(data[1].strip(), 'sjis')
- tits = unicode(data[2].strip(), 'sjis') # Big tits. Not flat chest.
- #First line is English name.
- chars[eng] = {'English':eng, 'Japanese':jap, 'Tits':tits}
- infile.close()
- return chars
- #r18 is a boolean.
- #keyword should be UNICODE text of what to search for.
- #Touhou is added automatically.
- def do_search(keyword, r18=False, recursion=0):
- global opener
- if recursion > 3:
- raise Exception("Could not authenticate.")
- url = 'http://www.pixiv.net/search.php?s_mode=s_tag&word=' + keyword.encode('utf-8')
- touhou = u'\u6771\u65b9'
- url += '%20' + touhou.encode('utf-8')
- if r18:
- #url += '&r18=1' #I'm going to assume that this is somehow more reliable than searching for "r-18"
- url += '%20r-18'
- #print "Opening:", url.decode('utf-8').encode('sjis') #At least on my comp, utf-8 does not display in the console. So this is pointless to print.
- request = urllib2.Request(url)
- response = opener.open(request)
- text = response.read()
- #print text
- if text.find('<a class="signup_button"') != -1 or text.find('<div class="mail-signup"') != -1 or text.find('register-introduction-modal') != -1:
- #print 'trying login...'
- #Quickest way to retry search with the login.
- do_login()
- return do_search(keyword, r18, recursion+1)
- #print text
- #exit()
- m = re.search('<span class="count-badge">([0-9]+)results</span>', text)
- results = int(m.group(1))
- return results
- def do_login():
- global opener
- username = raw_input('Pixiv username: ').strip()
- password = raw_input('Pixiv password: ').strip()
- if username == '' or password == '':
- raise Exception("Authentication aborted.")
- postdata = {'mode': 'login', 'pixiv_id': username, 'pass' : password, 'skip' : 1}
- response = opener.open('http://www.pixiv.net/login.php', urllib.urlencode(postdata))
- text = response.read()
- outfile = open('results.csv', 'w')
- outfile.write("Character,Japanese tag,results,ero,Tits tag,results,ero,futa,normal:lewd,normal:tits,tits:lewdtits,ero:futa\n")
- futastr = u'\u3075\u305f\u306a\u308a'
- charas = load_characters()
- for cx in charas:
- c = charas[cx]
- normal = do_search(c['Japanese'])
- #print 'Regular search for', c['English'], normal
- ero = do_search(c['Japanese'], True)
- #print 'Ero search for', c['English'], ero
- tits = do_search(c['Tits'])
- #print 'Tits search for', c['English'], tits
- erotits = do_search(c['Tits'], True)
- #print 'Ero tits search for', c['English'], erotits
- futaresults = do_search(futastr + '%20' + c['Japanese'] , True) # R-18 is probably redundant, but whatever. If anything isn't R-18 it probably shouldn't be counted.
- #break
- #Write results
- #outfile.write(c['English'] + "," + c['Japanese'].encode('sjis') + "," + str(normal) + "," + str(ero) + ",")
- outfile.write(c['English'])
- outfile.write(",")
- outfile.write(c['Japanese'].encode('sjis'))
- outfile.write(",")
- outfile.write(str(normal))
- outfile.write(",")
- outfile.write(str(ero))
- outfile.write(",")
- #outfile.write(c['Tits'].encode('sjis') + "," + str(tits) + "," + str(erotits) + ",")
- outfile.write(c['Tits'].encode('sjis'))
- outfile.write(",")
- outfile.write(str(tits))
- outfile.write(",")
- outfile.write(str(erotits))
- outfile.write(",")
- outfile.write(str(futaresults))
- outfile.write(",")
- outfile.write("%.2f" % (float(ero) / float(normal) * 100) + ",") # Normal : Ero percentage
- outfile.write("%.2f" % (float(tits) / float(normal) * 100) + ",") # Normal : Tits percentage
- if (tits == 0):
- outfile.write("NaN,")
- else:
- outfile.write("%.2f" % (float(erotits) / float(tits) * 100) + ",") # Tits : Erotits percentage
- outfile.write("%.2f" % (float(futaresults) / float(ero) * 100) + "\n") # Ero : Futa percentage
- print c['English'], 'is', "%.2f%%" % (float(ero) / float(normal) * 100), 'lewd.'
- titsstr = u'\u4e73'
- normal = do_search('') #All touhou images
- ero = do_search('', True) #All r-18 touhou images
- tits = do_search(titsstr) #Kanji for milk, which means tits. This one had more results than "Oppai"
- erotits = do_search(titsstr, True) #Ero milk
- futagirls = do_search(futastr, True) # R-18 is probably redundant, but whatever. If anything isn't R-18 it probably shouldn't be counted.
- #Write results
- outfile.write('Baseline' + "," + 'Baseline' + "," + str(normal) + "," + str(ero) + ",")
- outfile.write(titsstr.encode('sjis'))
- outfile.write("," + str(tits) + "," + str(erotits) + "," + str(futagirls) + ",")
- outfile.write("%.2f" % (float(ero) / float(normal) * 100) + ",") # Normal : Ero percentage
- outfile.write("%.2f" % (float(tits) / float(normal) * 100) + ",") # Normal : Tits percentage
- outfile.write("%.2f" % (float(erotits) / float(tits) * 100) + ",") # Tits : Erotits percentage
- outfile.write("%.2f" % (float(futagirls) / float(ero) * 100) + ",") # Tits : Erotits percentage
- outfile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement