SHARE
TWEET

catch-cheaters.py

a guest Feb 17th, 2011 281 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import glob, string, os, re
  2. from htmlentitydefs import name2codepoint as n2cp
  3.  
  4. def substitute_entity(match):
  5.     ent = match.group(2)
  6.     if match.group(1) == "#":
  7.         return unichr(int(ent))
  8.     else:
  9.         cp = n2cp.get(ent)
  10.  
  11.         if cp:
  12.             return unichr(cp)
  13.         else:
  14.             return match.group()
  15.  
  16. def decode_htmlentities(string):
  17.     entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
  18.     return entity_re.subn(substitute_entity, string)[0]
  19.  
  20.  
  21. for filename in glob.glob('*.html'):
  22.     f = open(filename)
  23.     writeLines = False
  24.     count = 0
  25.     for line in f:
  26.         count = count+1
  27.         if string.find(line, 'title') != -1:
  28.             start = 14 + string.find(line, 'CodingBat Java ')
  29.             end = string.find(line, '</title>')
  30.             dir, fn = string.split(line[start:end])
  31.             if not os.path.exists(dir):
  32.                 os.makedirs(dir)
  33.             o = open(dir+"/"+fn+".txt", 'w')
  34.         if string.find(line, '</textarea>') != -1:
  35.             writeLines = False
  36.         if writeLines == True:
  37.             o.write(decode_htmlentities(line))
  38.         if string.find(line, '<textarea') != -1:
  39.             writeLines = True
  40.     o.close()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top