avafavico GAE app, new version with python27 and lxml

#!/usr/bin/env python
#
# "blogava" - blogger avatar fetch, and gradient data URI generator
# v1 2.11.2011
# v2 ..
# v3 11.11.2011
# v4 14.11.2011
#    ?c1=f8dd99&c2=eeaa00&w=1&h=20 create gradient png
#    optional: ?download or ?js[=variablename]
# v4.01 14.12.2011 -
#    blogger new profile page default profile without server-part,
#    added code to deal with that
# v4.02 18.12.2011 -
#    increased timeout 5->15 in blogger profile pages, handle downloaderrors
# v5 11.3.2012 -
#    uses now python27 and lxml/etree/xpath to parse profile pages (is much
#    better that regular expressions)
#
# -----------------------------------------------------------------------------
#
# fetch profile image from blogger user id. usage:
# http://avafavico.appspot.com/?userid=01234567890
#
# See http://yabtb.blogspot.com/2011/11/google-app-engine-python-application.html
#
# This is like my first python app, so it may not be too shiny... but it works
#
#   - MS-potilas
#

import sys
import cgi
import re
import base64
from google.appengine.api import images
from google.appengine.api import urlfetch
from google.appengine.api import memcache
from google.appengine.runtime import DeadlineExceededError
from lxml import etree

#
# png & gradient code based on:
# http://jtauber.com/blog/2008/05/18/creating_gradients_programmatically_in_python/
#

#########################################

def make_png(width, height, rgba_func):
  import zlib
  import struct
  import array

  rotate = False
  if height < width:
    width, height = height, width
    rotate = True

  def make_chunk(chunk_type, data):
    chu = struct.pack('!I', len(data)) + chunk_type + data
    checksum = zlib.crc32(data, zlib.crc32(chunk_type))
    chu += struct.pack('!I', 0xFFFFFFFF & checksum)
    return chu

  def frange(x):
    for i in xrange(x):
      yield i/float(x)

  def get_data(width, height, rgba_func):
    data = array.array('B')
    for y in frange(height):
      data.append(0)
      for x in frange(width):
        data.extend(int(round(v * 255)) for v in rgba_func(x, y))
    return zlib.compress(data)

  out = array.array('B', [137, 80, 78, 71, 13, 10, 26, 10]).tostring() # PNG signature
  color_type = 6 if len(list(rgba_func(0,0))) == 4 else 2
  out += make_chunk('IHDR', struct.pack('!2I5B', width, height, 8, color_type, 0, 0, 0))
  out += make_chunk('IDAT', get_data(width, height, rgba_func))
  out += make_chunk('IEND', '')
  img = images.Image(out)

  img.rotate(270)     # rotate it around to optimize
  if not rotate:      # not rotate -> do full circle
    img.rotate(90)
  result = img.execute_transforms(output_encoding=images.PNG)
  return result

def linear_gradient(start_value, stop_value, start_offset=0.0, stop_offset=1.0):
  return lambda offset: (start_value + ((offset - start_offset) / (stop_offset - start_offset) * (stop_value - start_value))) / 255.0

def gradient(segments):
  def gradient_function(x, y):
    segment_start = 0.0
    for segment_end, start, end in segments:
      if y < segment_end:
        return (linear_gradient(start[i], end[i], segment_start, segment_end)(y) for i in xrange(len(start)))
      segment_start = segment_end
  return gradient_function

#########################################

def getFavico(domain):
# get favico for domain. first check cache
  result = memcache.get(key=domain)
  if result is None:
    result = fetchUrl("http://www.google.com/s2/favicons?domain=" + domain)
    memcache.add(key=domain, value=result, time=14400)
  return result

def fetchUrl(url):
  url = re.sub("^//", "http://", url)
  dline = 5
  if re.match("http:\/\/www\.blogger.com\/profile\/.+", url):
    dline = 15
  try:
    result = urlfetch.fetch(url, deadline=dline)
  except urlfetch.DownloadError:
    return None
  if result.status_code == 200:
    return result.content
  return None

#########################################

def main():
  form = cgi.FieldStorage(keep_blank_values = True)
  c1 = form.getfirst("c1")
  c2 = form.getfirst("c2")
  w = form.getfirst("w")
  h = form.getfirst("h")

  if w is None or not re.match('\d+$', w):
    w = 0
  if h is None or not re.match('\d+$', h):
    h = 0
  if int(w)*int(h) > 4096:
    print "Status: 400 Bad Request"
    print
    print "dimensions (w x h) too large"
    return 1

  if c1 is not None and c2 is not None:
    if re.match('[a-f0-9]{6}$', c1, re.I) and re.match('[a-f0-9]{6}$', c2, re.I) and int(w) > 0 and int(h) > 0:
      c1r = int(c1[:2],16)
      c1g = int(c1[2:][:2],16)
      c1b = int(c1[-2:],16)
      c2r = int(c2[:2],16)
      c2g = int(c2[2:][:2],16)
      c2b = int(c2[-2:],16)
    else:
      print "Status: 400 Bad Request"
      print
      print "invalid parameters"
      return 1

    img = make_png(int(w), int(h), gradient([ (1.0, (c1r, c1g, c1b), (c2r, c2g, c2b)), ]))

    if form.getfirst("download") is not None:
      print "Content-Type: image/png"
      print "Cache-Control: public, max-age=86400"
      print
      print img
      return 0
    imgenc = base64.b64encode(img)
    if form.getfirst("js") is not None:
      var = form.getfirst("js")
      if not re.match('[a-zA-Z]+$', var):
        var = "result"

      print "Content-Type: text/javascript"
      print "Cache-Control: public, max-age=86400"
      print
      print 'var ' + var + '="'+imgenc+'";'
      return 0

    imgenc = 'data:image/png;base64,' + imgenc
    print "Content-Type: text/html"
    print "Cache-Control: public, max-age=86400"
    print
    print '<html><head><link rel="shortcut icon" href="/favicon.ico" /><title>Gradient PNG data URI generator</title></head><body style="text-align:center;">'
    print 'Gradient from #' + c1 + ' to #' + c2 + ', width='+w+', height='+h+':<br />'
    print '<textarea id="pngarea" rows="' + str(round(len(imgenc)/64+2))+ '" cols="64">'+imgenc+'</textarea>'
    print '<script>document.getElementById("pngarea").focus();document.getElementById("pngarea").select();</script>'
    print '<div style="margin:10px"><a title="Click to open data URI" target="_top" href="'+imgenc+'"><img src="'+imgenc+'" style="padding:10px;border:1px solid #d0d0d0;" /></a></div>'
    print 'Use it for example like this in CSS:<br /><div style="margin-left:auto;margin-right:auto;width:520px;text-align:left;font-family:monospace;word-wrap:break-word;font-size:90%;margin-bottom:12px;">'
    print 'background:url('+imgenc+') 100% 100%;'
    print '</div>'
    print '<hr />'
    print '<div style="font-size:85%">by MS-potilas 2011-2012, see <a target="_top" href="http://yabtb.blogspot.com/2011/11/gradient-png-data-uri-maker-reference.html">yabtb.blogspot.com</a>.</div>'
    print '</body></html>'
    return 0

  userid = form.getfirst("userid")

  if userid is None:
    print "Content-Type: text/html"
    print "Cache-Control: public, max-age=14400"
    print
    print '<html><head><link rel="shortcut icon" href="/favicon.ico" /><title>Fetch Blogger Avatar</title></head><body style="text-align:center;">Application to return small icon from Blogger profile. Usage: ?userid=USERID.<br /><hr /><div style="font-size:85%">by MS-potilas 2011-2012, see <a href="http://yabtb.blogspot.com/2011/11/python-tool-to-get-blogger-avatar.html">yabtb.blogspot.com</a>.</div><br />'
    print '</body></html>'
    return 0

  if not re.match('\d+$', userid):
    print "Status: 400 Bad Request"
    print
    print "invalid userid"
    return 1

  # first check cache
  thedata = memcache.get(key=userid)
  if thedata is None:
    domain = "www.blogger.com" # fallback favico
    url = "http://www.blogger.com/profile/"+userid
    result = fetchUrl(url)
    if result is not None:
      tree = etree.HTML(result)
      r=tree.xpath("//img[@id='profile-photo']/@src")
      if len(r) == 0:
        r=tree.xpath("//img[@class and contains(concat(' ',normalize-space(@class),' '),' photo ')]/@src")
      if len(r) > 0:
        found = r[0]
      # if profile photo not found, search person's first blog's address (rel="contributor-to")
      if found is None:
        r=tree.xpath("//a[@rel and contains(concat(' ',normalize-space(@rel),' '),' contributor-to ')]/@href")
        if len(r) > 0:
          found = r[0]
        if found is not None:
          domain = re.search('(http://){0,1}(.+?)[$/]', found).group(2)
          found = None
      if found is None:
        result = getFavico(domain)
      else:
        found = re.sub('^//',"http://", found)
        if not re.match('http', found):
          found = "http://www.blogger.com" + found
        result = fetchUrl(found)
    # if loading has failed, fallback to domain's favico
    if result is None:
      result = getFavico(domain)
    if result is not None:
      img = images.Image(result)

      if img.width > img.height*1.67:
        img.crop(0.2,0.0,0.8,1.0)
      elif img.height > img.width*1.67:
        img.crop(0.0,0.2,1.0,0.8)
      elif img.width > img.height*1.25:
        img.crop(0.1,0.0,0.9,1.0)
      elif img.height > img.width*1.25:
        img.crop(0.0,0.1,1.0,0.9)

      img.resize(32,32)
      thedata=img.execute_transforms(output_encoding=images.PNG)
      memcache.add(key=userid, value=thedata, time=14400)
    else:
      print "Status: 404 Not Found"
      print
      print "not found"
      return 1 #all fetches failed

  print "Content-Type: image/png"
  print "Cache-Control: public, max-age=14400"
  print
  print thedata
  return 0

if __name__ == '__main__':
  main()