Untitled

import System.Text
import System.Text.RegularExpressions
import System.Web.Script.Serialization
import AlbumArtDownloader.Scripts
import util

class ImageInfo:
	public pageUrl as String
	public imageList as List[Image]
	class Image:
		public url as String
		public id as String
		public width as int
		public height as int

//Inheritors should override the Suffix property to return a valid amazon suffix (like com, co.uk, de, etc...).
abstract class Amazon(AlbumArtDownloader.Scripts.IScript):
	private highresResults as Hash

	virtual Name as string:
		get: return "Amazon (.${Suffix})"
	Version as string:
		get: return "0.8s"
	Author as string:
		get: return "Alex Vallat"
	abstract protected Suffix as string:
		get: pass
	virtual protected CountryCode as string:
		get: return "01"
	virtual protected SearchIndex as string: //Deprectated, ignored.
		get: return ""
	virtual protected def GetUrl(artist as string, album as string) as string:
		return "http://www.amazon.${Suffix}/gp/search?search-alias=popular&field-artist=${EncodeUrl(artist, PageEncoding)}&field-keywords=${EncodeUrl(album, PageEncoding)}&sort=relevancerank"
	virtual protected PageEncoding as Encoding:
		get: return Encoding.GetEncoding("iso-8859-1")


	def Search(artist as string, album as string, results as IScriptResults):
		highresResults = {}

		artist = StripCharacters("&.'\";:?!", artist)
		album = StripCharacters("&.'\";:?!", album)

		url = GetUrl(artist, album)
		resultsPage = GetPage(GetPageStream(url, null, true), PageEncoding)

		resultsRegex = Regex("<div\\s[^>]*class\\s*=\\s*\"title\"[^>]*>\\s*<a\\s[^>]*href\\s*=\\s*\"(?<url>[^\"]+?/dp/(?<id>[^/]+)/)[^>]+>\\s*(?<title>.*?)</a>(?:\\s*<span\\s[^>]*class=\"ptBrand\"[^>]*>(?:[^<]*<a\\s[^>]*>)?\\s*(?:by |von |de |di )?(?<artist>[^<]+))?", RegexOptions.Singleline | RegexOptions.IgnoreCase)
		resultsMatches = resultsRegex.Matches(resultsPage)

		results.EstimatedCount = resultsMatches.Count

		json = JavaScriptSerializer()

		for resultsMatch as Match in resultsMatches:
			id = resultsMatch.Groups["id"].Value
			url = resultsMatch.Groups["url"].Value
			title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
			artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
			imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."

			thumbnail = TryGetImageStream(imageBase + "_THUMB_")
			highresResults[imageBase] = true

			results.Add(thumbnail, "${artist} - ${title}", url, -1, -1, imageBase, CoverType.Front)

		count = 0
		for resultsMatch as Match in resultsMatches:
			// We hit a page for each result.  Searches on Amazon should generally return the
			// item that was searched for quickly if it's going to be found at all, so don't
			// hammer the server.
			count++
			if count > 5:
				break

			id = resultsMatch.Groups["id"].Value
			url = resultsMatch.Groups["url"].Value
			title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
			artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
			imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."

			images_url = "http://www.amazon.co.${Suffix}/gp/customer-media/product-gallery/${id}"
			imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
			jsonRegex = Regex('var state = (?<json>{[^;]*});', RegexOptions.Multiline)
			for jsonDataMatch as Match in jsonRegex.Matches(imagesPage):
				jsonData = jsonDataMatch.Groups["json"].Value

				// amazon.co.jp uses double-width backslashes when escaping JS strings.  No, really.
				jsonData = Regex("＼").Replace(jsonData, "\\")

				result = json.Deserialize[of ImageInfo](jsonData)
				for image as ImageInfo.Image in result.imageList:
					thumbnail_url = image.url
					thumbnail_url = Regex("\\.jpg$").Replace(thumbnail_url, "._SX120_.jpg")

					results.Add(thumbnail_url, "${artist} - ${title}",
						images_url + "?currentImageID=${image.id}", image.width, image.height,
						image.url, CoverType.Front)

			// This one contains secondary official images.  (Why are these separate pages?)  This
			// is disabled; this will double the number of hits to the server, and most of the better
			// images are user images anyway.
//			images_url = "http://www.amazon.co.${Suffix}/gp/product/images/${id}"
//			imagesPage = GetPage(GetPageStream(images_url, null, true), PageEncoding)
//			customerImageRegex = Regex('fetchImage\\("alt_image_(?<idx>[0-9]+)", "(?<url>http[^"]+)"')
//			imageMatches = customerImageRegex.Matches(imagesPage)
//			for productMatch as Match in imageMatches:
//				image_url = productMatch.Groups["url"].Value
//				idx = productMatch.Groups["idx"].Value
//				results.Add(image_url, "${artist} - ${title}", url + "?img=" + idx, -1, -1, image_url, CoverType.Front)

	def RetrieveFullSizeImage(imageBase):
		if not highresResults.ContainsKey(imageBase):
			return TryGetImageStream(imageBase)

		imageStream = TryGetImageStream(imageBase + "_SCRM_")
		if imageStream != null:
			return imageStream

		//Fall back on Large size
		return TryGetImageStream(imageBase + "_SCL_")

	def TryGetImageStream(url):
		request as System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create(url)
		try:
			response = request.GetResponse()
			if response.ContentLength > 43:
				return response.GetResponseStream()

			response.Close()
			return null
		except e as System.Net.WebException:
			return null