Advertisement
qazmlpok

ero analysis.py

Mar 8th, 2013
173
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.57 KB | None | 0 0
  1. #Pixiv search URL: http://www.pixiv.net/search.php?s_mode=s_tag&word=<search term, UTF-8>
  2. #The list of tit tags is at:   http://dic.pixiv.net/a/%e6%9d%b1%e6%96%b9%e3%81%8a%e3%81%a3%e3%81%b1%e3%81%84%e3%82%bf%e3%82%b0%e4%b8%80%e8%a6%a7
  3. #Result count is contained in: <span class="count-badge">500results</span>
  4.  
  5. import sys
  6. import traceback
  7. import re
  8. import os
  9. import cookielib
  10. import urllib, urllib2, time
  11. from datetime import date, timedelta
  12.  
  13. #Globals
  14. jar = cookielib.CookieJar()
  15. opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
  16.  
  17. def load_characters():
  18.     infile = open('characters.txt', 'r')
  19.  
  20.     chars = {}
  21.  
  22.     for line in infile:
  23.         if line == '\n':
  24.             continue
  25.  
  26.         data = line.strip().split('\t')
  27.  
  28.         eng = unicode(data[0].strip(), 'sjis')
  29.         jap = unicode(data[1].strip(), 'sjis')
  30.         tits = unicode(data[2].strip(), 'sjis')     # Big tits. Not flat chest.
  31.  
  32.         #First line is English name.
  33.         chars[eng] = {'English':eng, 'Japanese':jap, 'Tits':tits}
  34.  
  35.     infile.close()
  36.  
  37.     return chars
  38.  
  39.  
  40. #r18 is a boolean.
  41. #keyword should be UNICODE text of what to search for.
  42. #Touhou is added automatically.
  43. def do_search(keyword, r18=False, recursion=0):
  44.     global opener
  45.  
  46.     if recursion > 3:
  47.         raise Exception("Could not authenticate.")
  48.  
  49.     url = 'http://www.pixiv.net/search.php?s_mode=s_tag&word=' + keyword.encode('utf-8')
  50.  
  51.     touhou = u'\u6771\u65b9'
  52.  
  53.     url += '%20' + touhou.encode('utf-8')
  54.  
  55.     if r18:
  56.         #url += '&r18=1'            #I'm going to assume that this is somehow more reliable than searching for "r-18"
  57.         url += '%20r-18'
  58.  
  59.     #print "Opening:", url.decode('utf-8').encode('sjis')           #At least on my comp, utf-8 does not display in the console. So this is pointless to print.
  60.     request = urllib2.Request(url)
  61.     response = opener.open(request)
  62.     text = response.read()
  63.  
  64.     #print text
  65.  
  66.     if text.find('<a class="signup_button"') != -1 or text.find('<div class="mail-signup"') != -1 or text.find('register-introduction-modal') != -1:
  67.         #print 'trying login...'
  68.         #Quickest way to retry search with the login.
  69.         do_login()
  70.         return do_search(keyword, r18, recursion+1)
  71.  
  72.     #print text
  73.     #exit()
  74.  
  75.     m = re.search('<span class="count-badge">([0-9]+)results</span>', text)
  76.     results = int(m.group(1))
  77.  
  78.     return results
  79.  
  80. def do_login():
  81.     global opener
  82.  
  83.     username = raw_input('Pixiv username: ').strip()
  84.     password = raw_input('Pixiv password: ').strip()
  85.  
  86.     if username == '' or password == '':
  87.         raise Exception("Authentication aborted.")
  88.  
  89.     postdata = {'mode': 'login', 'pixiv_id': username, 'pass' : password, 'skip' : 1}
  90.  
  91.     response = opener.open('http://www.pixiv.net/login.php', urllib.urlencode(postdata))
  92.     text = response.read()
  93.  
  94. outfile = open('results.csv', 'w')
  95. outfile.write("Character,Japanese tag,results,ero,Tits tag,results,ero,futa,normal:lewd,normal:tits,tits:lewdtits,ero:futa\n")
  96.  
  97. futastr = u'\u3075\u305f\u306a\u308a'
  98.  
  99. charas = load_characters()
  100. for cx in charas:
  101.     c = charas[cx]
  102.  
  103.     normal = do_search(c['Japanese'])
  104.     #print 'Regular search for', c['English'], normal
  105.  
  106.     ero = do_search(c['Japanese'], True)
  107.     #print 'Ero search for', c['English'], ero
  108.  
  109.     tits = do_search(c['Tits'])
  110.     #print 'Tits search for', c['English'], tits
  111.  
  112.     erotits = do_search(c['Tits'], True)
  113.     #print 'Ero tits search for', c['English'], erotits
  114.  
  115.     futaresults = do_search(futastr + '%20' + c['Japanese'] , True) # R-18 is probably redundant, but whatever. If anything isn't R-18 it probably shouldn't be counted.
  116.  
  117.     #break
  118.     #Write results
  119.     #outfile.write(c['English'] + "," + c['Japanese'].encode('sjis') + "," + str(normal) + "," + str(ero) + ",")
  120.     outfile.write(c['English'])
  121.     outfile.write(",")
  122.     outfile.write(c['Japanese'].encode('sjis'))
  123.     outfile.write(",")
  124.     outfile.write(str(normal))
  125.     outfile.write(",")
  126.     outfile.write(str(ero))
  127.     outfile.write(",")
  128.  
  129.     #outfile.write(c['Tits'].encode('sjis') + "," + str(tits) + "," + str(erotits) + ",")
  130.     outfile.write(c['Tits'].encode('sjis'))
  131.     outfile.write(",")
  132.     outfile.write(str(tits))
  133.     outfile.write(",")
  134.     outfile.write(str(erotits))
  135.     outfile.write(",")
  136.  
  137.     outfile.write(str(futaresults))
  138.     outfile.write(",")
  139.  
  140.     outfile.write("%.2f" % (float(ero) / float(normal) * 100) + ",")    # Normal : Ero percentage
  141.     outfile.write("%.2f" % (float(tits) / float(normal) * 100) + ",")   # Normal : Tits percentage
  142.     if (tits == 0):
  143.         outfile.write("NaN,")
  144.     else:
  145.         outfile.write("%.2f" % (float(erotits) / float(tits) * 100) + ",")  # Tits : Erotits percentage
  146.     outfile.write("%.2f" % (float(futaresults) / float(ero) * 100) + "\n")  # Ero : Futa percentage
  147.  
  148.     print c['English'], 'is', "%.2f%%" % (float(ero) / float(normal) * 100), 'lewd.'
  149.  
  150. titsstr = u'\u4e73'
  151.  
  152. normal = do_search('')      #All touhou images
  153. ero = do_search('', True)   #All r-18 touhou images
  154. tits = do_search(titsstr)       #Kanji for milk, which means tits. This one had more results than "Oppai"
  155. erotits = do_search(titsstr, True)  #Ero milk
  156. futagirls = do_search(futastr, True)    # R-18 is probably redundant, but whatever. If anything isn't R-18 it probably shouldn't be counted.
  157.  
  158. #Write results
  159. outfile.write('Baseline' + "," + 'Baseline' + "," + str(normal) + "," + str(ero) + ",")
  160. outfile.write(titsstr.encode('sjis'))
  161. outfile.write("," + str(tits) + "," + str(erotits) + "," + str(futagirls) + ",")
  162.  
  163. outfile.write("%.2f" % (float(ero) / float(normal) * 100) + ",")    # Normal : Ero percentage
  164. outfile.write("%.2f" % (float(tits) / float(normal) * 100) + ",")   # Normal : Tits percentage
  165. outfile.write("%.2f" % (float(erotits) / float(tits) * 100) + ",")  # Tits : Erotits percentage
  166. outfile.write("%.2f" % (float(futagirls) / float(ero) * 100) + ",") # Tits : Erotits percentage
  167.  
  168. outfile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement