SHARE
TWEET

Untitled

a guest Sep 18th, 2011 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import System.Text
  2. import System.Text.RegularExpressions
  3. import System.Web.Script.Serialization
  4. import AlbumArtDownloader.Scripts
  5. import util
  6.  
  7. class ImageInfo:
  8.         public pageUrl as String
  9.         public imageList as List[Image]
  10.         class Image:
  11.                 public url as String
  12.                 public id as String
  13.                 public width as int
  14.                 public height as int
  15.  
  16. //Inheritors should override the Suffix property to return a valid amazon suffix (like com, co.uk, de, etc...).
  17. abstract class Amazon(AlbumArtDownloader.Scripts.IScript):
  18.         private highresResults as Hash
  19.  
  20.         virtual Name as string:
  21.                 get: return "Amazon (.${Suffix})"
  22.         Version as string:
  23.                 get: return "0.8s"
  24.         Author as string:
  25.                 get: return "Alex Vallat"
  26.         abstract protected Suffix as string:
  27.                 get: pass
  28.         virtual protected CountryCode as string:
  29.                 get: return "01"
  30.         virtual protected SearchIndex as string: //Deprectated, ignored.
  31.                 get: return ""
  32.         virtual protected def GetUrl(artist as string, album as string) as string:
  33.                 return "http://www.amazon.${Suffix}/gp/search?search-alias=popular&field-artist=${EncodeUrl(artist, PageEncoding)}&field-keywords=${EncodeUrl(album, PageEncoding)}&sort=relevancerank"
  34.         virtual protected PageEncoding as Encoding:
  35.                 get: return Encoding.GetEncoding("iso-8859-1")
  36.  
  37.        
  38.         def Search(artist as string, album as string, results as IScriptResults):
  39.                 highresResults = {}
  40.  
  41.                 artist = StripCharacters("&.'\";:?!", artist)
  42.                 album = StripCharacters("&.'\";:?!", album)
  43.                
  44.                 url = GetUrl(artist, album)
  45.                 resultsPage = GetPage(GetPageStream(url, null, true), PageEncoding)
  46.                
  47.                 resultsRegex = Regex("<div\\s[^>]*class\\s*=\\s*\"title\"[^>]*>\\s*<a\\s[^>]*href\\s*=\\s*\"(?<url>[^\"]+?/dp/(?<id>[^/]+)/)[^>]+>\\s*(?<title>.*?)</a>(?:\\s*<span\\s[^>]*class=\"ptBrand\"[^>]*>(?:[^<]*<a\\s[^>]*>)?\\s*(?:by |von |de |di )?(?<artist>[^<]+))?", RegexOptions.Singleline | RegexOptions.IgnoreCase)
  48.                 resultsMatches = resultsRegex.Matches(resultsPage)
  49.                
  50.                 results.EstimatedCount = resultsMatches.Count
  51.                
  52.                 json = JavaScriptSerializer()
  53.  
  54.                 for resultsMatch as Match in resultsMatches:
  55.                         id = resultsMatch.Groups["id"].Value
  56.                         url = resultsMatch.Groups["url"].Value
  57.                         title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
  58.                         artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
  59.                         imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."
  60.  
  61.                         thumbnail = TryGetImageStream(imageBase + "_THUMB_")
  62.                         highresResults[imageBase] = true
  63.  
  64.                         results.Add(thumbnail, "${artist} - ${title}", url, -1, -1, imageBase, CoverType.Front)
  65.  
  66.                 count = 0
  67.                 for resultsMatch as Match in resultsMatches:
  68.                         // We hit a page for each result.  Searches on Amazon should generally return the
  69.                         // item that was searched for quickly if it's going to be found at all, so don't
  70.                         // hammer the server.
  71.                         count++
  72.                         if count > 5:
  73.                                 break
  74.  
  75.                         id = resultsMatch.Groups["id"].Value
  76.                         url = resultsMatch.Groups["url"].Value
  77.                         title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
  78.                         artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
  79.                         imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."
  80.  
  81.                         images_url = "http://www.amazon.co.${Suffix}/gp/customer-media/product-gallery/${id}"
  82.                         imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
  83.                         jsonRegex = Regex('var state = (?<json>{[^;]*});', RegexOptions.Multiline)
  84.                         for jsonDataMatch as Match in jsonRegex.Matches(imagesPage):
  85.                                 jsonData = jsonDataMatch.Groups["json"].Value
  86.  
  87.                                 // amazon.co.jp uses double-width backslashes when escaping JS strings.  No, really.
  88.                                 jsonData = Regex("\").Replace(jsonData, "\\")
  89.  
  90.                                 result = json.Deserialize[of ImageInfo](jsonData)
  91.                                 for image as ImageInfo.Image in result.imageList:
  92.                                         thumbnail_url = image.url
  93.                                         thumbnail_url = Regex("\\.jpg$").Replace(thumbnail_url, "._SX120_.jpg")
  94.  
  95.                                         results.Add(thumbnail_url, "${artist} - ${title}",
  96.                                                 images_url + "?currentImageID=${image.id}", image.width, image.height,
  97.                                                 image.url, CoverType.Front)
  98.  
  99.                         // This one contains secondary official images.  (Why are these separate pages?)  This
  100.                         // is disabled; this will double the number of hits to the server, and most of the better
  101.                         // images are user images anyway.
  102. //                      images_url = "http://www.amazon.co.${Suffix}/gp/product/images/${id}"
  103. //                      imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
  104. //                      customerImageRegex = Regex('fetchImage\\("alt_image_(?<idx>[0-9]+)", "(?<url>http[^"]+)"')
  105. //                      imageMatches = customerImageRegex.Matches(imagesPage)
  106. //                      for productMatch as Match in imageMatches:
  107. //                              image_url = productMatch.Groups["url"].Value
  108. //                              idx = productMatch.Groups["idx"].Value
  109. //                              results.Add(image_url, "${artist} - ${title}", url + "?img=" + idx, -1, -1, image_url, CoverType.Front)
  110.  
  111.         def RetrieveFullSizeImage(imageBase):
  112.                 if not highresResults.ContainsKey(imageBase):
  113.                         return TryGetImageStream(imageBase)
  114.  
  115.                 imageStream = TryGetImageStream(imageBase + "_SCRM_")
  116.                 if imageStream != null:
  117.                         return imageStream
  118.  
  119.                 //Fall back on Large size
  120.                 return TryGetImageStream(imageBase + "_SCL_")
  121.  
  122.         def TryGetImageStream(url):
  123.                 request as System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create(url)
  124.                 try:
  125.                         response = request.GetResponse()
  126.                         if response.ContentLength > 43:
  127.                                 return response.GetResponseStream()
  128.                        
  129.                         response.Close()
  130.                         return null
  131.                 except e as System.Net.WebException:
  132.                         return null
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top