SHARE
TWEET

Untitled

a guest Sep 17th, 2011 132 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import System.Text
  2. import System.Text.RegularExpressions
  3. import System.Web.Script.Serialization
  4. import AlbumArtDownloader.Scripts
  5. import util
  6.  
  7. class ImageInfo:
  8.         public pageUrl as String
  9.         public imageList as List[Image]
  10.         class Image:
  11.                 public url as String
  12.                 public id as String
  13.                 public width as int
  14.                 public height as int
  15.  
  16. //Inheritors should override the Suffix property to return a valid amazon suffix (like com, co.uk, de, etc...).
  17. abstract class Amazon(AlbumArtDownloader.Scripts.IScript):
  18.         private highresResults as Hash
  19.  
  20.         virtual Name as string:
  21.                 get: return "Amazon (.${Suffix})"
  22.         Version as string:
  23.                 get: return "0.8s"
  24.         Author as string:
  25.                 get: return "Alex Vallat"
  26.         abstract protected Suffix as string:
  27.                 get: pass
  28.         virtual protected CountryCode as string:
  29.                 get: return "01"
  30.         virtual protected SearchIndex as string: //Deprectated, ignored.
  31.                 get: return ""
  32.         virtual protected def GetUrl(artist as string, album as string) as string:
  33.                 return "http://www.amazon.${Suffix}/gp/search/ref=sr_adv_m_pop/?search-alias=popular&field-artist=${EncodeUrlIsoLatin1(artist)}&field-title=${EncodeUrlIsoLatin1(album)}&sort=relevancerank"
  34.         virtual protected PageEncoding as Encoding:
  35.                 get: return Encoding.GetEncoding("iso-8859-1")
  36.  
  37.        
  38.         def Search(artist as string, album as string, results as IScriptResults):
  39.                 highresResults = {}
  40.  
  41.                 artist = StripCharacters("&.'\";:?!", artist)
  42.                 album = StripCharacters("&.'\";:?!", album)
  43.                
  44.                 url = GetUrl(artist, album)
  45.                 resultsPage = GetPage(GetPageStream(url, null, true), PageEncoding)
  46.                
  47.                 resultsRegex = Regex("<div\\s[^>]*class\\s*=\\s*\"title\"[^>]*>\\s*<a\\s[^>]*href\\s*=\\s*\"(?<url>[^\"]+?/dp/(?<id>[^/]+)/)[^>]+>\\s*(?<title>.*?)</a>(?:\\s*<span\\s[^>]*class=\"ptBrand\"[^>]*>(?:[^<]*<a\\s[^>]*>)?\\s*(?:by |von |de |di )?(?<artist>[^<]+))?", RegexOptions.Singleline | RegexOptions.IgnoreCase)
  48.                 resultsMatches = resultsRegex.Matches(resultsPage)
  49.                
  50.                 results.EstimatedCount = resultsMatches.Count
  51.                
  52.                 json = JavaScriptSerializer()
  53.  
  54.                 count = 0
  55.                 for resultsMatch as Match in resultsMatches:
  56.                         // We hit a page for each result.  Searches on Amazon should generally return the
  57.                         // item that was searched for quickly if it's going to be found at all, so don't
  58.                         // hammer the server.
  59.                         count++
  60.                         if count > 5:
  61.                                 break
  62.  
  63.                         id = resultsMatch.Groups["id"].Value
  64.                         url = resultsMatch.Groups["url"].Value
  65.                         title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
  66.                         artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
  67.                         imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."
  68.  
  69.                         thumbnail = TryGetImageStream(imageBase + "_THUMB_")
  70.                         highresResults[imageBase] = true
  71.                         results.Add(thumbnail, "${artist} - ${title}", url, -1, -1, imageBase, CoverType.Front)
  72.  
  73.                         images_url = "http://www.amazon.co.jp/gp/customer-media/product-gallery/${id}"
  74.                         imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
  75.                         jsonRegex = Regex('var state = (?<json>{[^;]*});', RegexOptions.Multiline)
  76.                         for jsonDataMatch as Match in jsonRegex.Matches(imagesPage):
  77.                                 jsonData = jsonDataMatch.Groups["json"].Value
  78.  
  79.                                 // amazon.co.jp uses double-width backslashes when escaping JS strings.  No, really.
  80.                                 jsonData = Regex("\").Replace(jsonData, "\\")
  81.  
  82.                                 test = json.Deserialize[of ImageInfo](jsonData)
  83.                                 if test.pageUrl is null:
  84.                                         results.Add(thumbnail, "${artist} - ${title}", "null", -1, -1, url, CoverType.Front)
  85.                                         continue
  86.                                 for image as ImageInfo.Image in test.imageList:
  87.                                         results.Add(image.url, "${artist} - ${title}",
  88.                                                 images_url + "?currentImageID=${image.id}", image.width, image.height,
  89.                                                 image.url, CoverType.Front)
  90.  
  91.                         // This one contains secondary official images.  (Why are these separate pages?)  This
  92.                         // is disabled; this will double the number of hits to the server, and most of the better
  93.                         // images are user images anyway.
  94. //                      images_url = "http://www.amazon.co.jp/gp/product/images/${id}"
  95. //                      imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
  96. //                      customerImageRegex = Regex('fetchImage\\("alt_image_(?<idx>[0-9]+)", "(?<url>http[^"]+)"')
  97. //                      imageMatches = customerImageRegex.Matches(imagesPage)
  98. //                      for productMatch as Match in imageMatches:
  99. //                              image_url = productMatch.Groups["url"].Value
  100. //                              idx = productMatch.Groups["idx"].Value
  101. //                              results.Add(image_url, "${artist} - ${title}", url + "?img=" + idx, -1, -1, image_url, CoverType.Front)
  102.  
  103.         def RetrieveFullSizeImage(imageBase):
  104.                 if not highresResults.ContainsKey(imageBase):
  105.                         return TryGetImageStream(imageBase)
  106.  
  107.                 imageStream = TryGetImageStream(imageBase + "_SCRM_")
  108.                 if imageStream != null:
  109.                         return imageStream
  110.  
  111.                 //Fall back on Large size
  112.                 return TryGetImageStream(imageBase + "_SCL_")
  113.  
  114.         def TryGetImageStream(url):
  115.                 request as System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create(url)
  116.                 try:
  117.                         response = request.GetResponse()
  118.                         if response.ContentLength > 43:
  119.                                 return response.GetResponseStream()
  120.                        
  121.                         response.Close()
  122.                         return null
  123.                 except e as System.Net.WebException:
  124.                         return null
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top