Advertisement
tastypear

抓些playstore数据

May 13th, 2012
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.58 KB | None | 0 0
  1. #for win
  2. #python + curl
  3. import urllib2
  4. import os
  5. local = "D:\\apks\\"
  6. logoWidth = 124 #MAX 512
  7. imgNum = 2 #Based on app
  8. url = raw_input("play Uri\n")
  9.  
  10. def getPkgName(url):
  11.     pkgNameB = url.index("=")+1
  12.     if '&' in url:
  13.         pkgNameE = url.index('&')
  14.     else:
  15.         pkgNameE = len(url)
  16.     return url[pkgNameB:pkgNameE]
  17. def getUrl(pkg):
  18.     return "http://play.google.com/store/apps/details?id=" + pkg + "&hl=en"
  19. def getContent(url):
  20.     return urllib2.urlopen(url).read()
  21. def getVer(content):
  22.     verB = content.index("softwareVersion")+17
  23.     verE = content.find("<",verB)
  24.     return content[verB:verE]
  25. def getLogo(content):
  26.     logoB = content.index("doc-banner-icon")+27
  27.     logoE = content.find("=",logoB)
  28.     return (content[logoB:logoE] + "=w" + repr(logoWidth)).replace("https","http")
  29. def getSys(content):
  30.     sysB = content.find("<dd>",content.index("operatingSystems"))+4
  31.     sysE = content.find("</dd>",sysB)
  32.     return content[sysB:sysE]
  33. def getCat(content):
  34.     catB = content.find("<dd>",content.index("Category:"))+4
  35.     catB = content.find(">",catB)+1
  36.     catE = content.find("</a>",catB)
  37.     return content[catB:catE].replace("&amp;","&")
  38. def getPrice(content):
  39.     priceB = content.index("buy-button-price")+16
  40.     priceB = content.find(">",priceB)+1
  41.     priceE = content.find("span>",priceB)-2
  42.     return content[priceB:priceE].replace(" Buy","").replace("Install","$0")
  43. def getImg(content):
  44.     sliceB = content.index("screenshot-carousel-content-container")+39
  45.     sliceE = content.find("screenshot-carousel-right-fade",sliceB)-78
  46.     imgContainer = content[sliceB:sliceE]
  47.     imgContainer = imgContainer.split("=h")
  48.     result = []
  49.     i = 0
  50.     while( i < len(imgContainer)):
  51.         if "data-baseUrl=" in imgContainer[i]:
  52.             posB = imgContainer[i].index("data-baseUrl=")+14
  53.             posE = imgContainer[i].index("title")-2
  54.             result.append((imgContainer[i][posB:posE]).replace("https","http"))
  55.         i = i+1
  56.     return result
  57. def getTitle(content):
  58.     titleB = content.index("<h1 class=\"doc-banner-title\">")+29
  59.     titleE = content.find("</h1>",titleB)
  60.     return content[titleB:titleE]
  61.  
  62. pkgName = getPkgName(url)
  63. content = getContent(getUrl(pkgName))
  64.  
  65. title = getTitle(content)
  66. apkdir = local + title
  67. os.system("mkdir " +"\""+apkdir +"\"")
  68.  
  69. logo = getLogo(content)
  70. os.system(local+"curl.exe -o \"" + apkdir +"\\logo.png\" "+ logo)
  71.  
  72. img = getImg(content)
  73. i = 0
  74. while( i < len(img)):
  75.     os.system(local+"curl.exe -o \"" + apkdir +"\\pic"+ repr(i) + ".png\" "+ img[i])
  76.     i = i+1
  77. #os.system('cls')
  78.  
  79. cat = getCat(content)
  80. sys = getSys(content)
  81. ver = getVer(content)
  82. price = getPrice(content)
  83. print title
  84. print cat
  85. print sys
  86. print pkgName
  87. print ver
  88. print price
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement