Export android app reviews

from HTMLParser import HTMLParser
import json, sys, re
import requests # requires pip install requests

# Following thanks to
# http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ' '.join(self.fed)

def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

url = "https://play.google.com/store/getreviews?id=%s&reviewSortOrder=2&reviewType=1&pageNum="
url = url % sys.argv[1]

headers = {'Content-Type':'application/x-www-form-urlencoded;charset=utf-8','Content-Length':'5'}

reviews_list = []
max_pages = -1
i = 0
while (i == 0 or i < max_pages):
    r = requests.post(url+str(i), headers=headers, data=json.dumps({'xhr':1}))
    content = json.loads(r.text[r.text.find('{'):])
    if max_pages == -1:
        max_pages = content['numPages']
    reviews = content['htmlContent']
    reviews = reviews.replace('\u003C','<')
    reviews = reviews.replace('&quot;',"")
    reviews = reviews.replace('&#39;',"'")
    for review in reviews.split('<div class="doc-review">'):
        match = re.search(r'<strong>(.*)</strong>.*doc-review-date"> - (.*)<.* - ([^<]*)<.*Rating: ([0-9\.]*) ', review)
        if match:
            author = match.group(1)
            date = match.group(2)
            device = match.group(3)
            rating = match.group(4)
            text = title = ''
            match = re.search(r'review-text">(.*?)</p>', review)
            if match:
                text = match.group(1)
            match = re.search(r'review-title">(.*?)</h4>', review)
            if match:
                title = match.group(1)

            reviews_list.append({'date':date,
                                 'author':author,
                                 'device':device,
                                 'rating':rating,
                                 'title':title,
                                 'review':text} )
    i = i + 1

print json.dumps(reviews_list);