Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var Zombie = require("zombie"),
- USERAGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.76 Safari/537.36",
- testUser = {
- email: "***",
- password: "***"
- };
- var amazon = module.exports = {
- /*
- * Signs in to the amazon dashboard and returns
- * an authenticated session to be continued for
- * whatever purpose
- */
- authenticate: function(user, callback) {
- var browser = new Zombie({
- loadCSS: false,
- runScripts: false,
- maxWait: 30,
- waitFor: 1000,
- userAgent: USERAGENT,
- windowName: "chrome",
- site: "https://developer.amazon.com/"
- });
- // go to the sign in page
- setLocation(browser, user, "/login.html", function(error, browser) {
- if (error) {
- console.log("[authenticate] error signing in", error, browser.error);
- return callback(error || browser.error || "Missing page body");
- }
- // do the sign in
- browser
- .fill("#ap_email", user.email)
- .fill("#ap_password", user.password)
- .pressButton("#signInSubmit-input", function(error) {
- callback(null, browser);
- });
- });
- },
- /**
- * Scrapes a complete list of the user's games with
- * gameId and title data
- */
- gameList: function(user, browser, callback) {
- if (!browser || browser.location.pathname != "/myapps.html") {
- return setLocation(browser, user, "/myapps.html", function(error, browser) {
- if (error) {
- console.log("[gameList] error setting initial page", error, browser.error);
- return callback(error || browser.error || "Missing page body");
- }
- return amazon.gameList(user, browser, callback);
- });
- }
- console.log("[gameList] ready to start processing");
- // get the number of pages
- var nav = browser.queryAll("nav[class=pagination]")[0];
- var pageLinks = browser.queryAll("a", nav);
- var pagePrefix = pageLinks[0].href;
- pagePrefix = pagePrefix.substring(0, pagePrefix.indexOf("=") + 1);
- // scrape each page
- var currentPage = 1;
- var numPages = pageLinks.length + 1 - 4; // there are 2 << and < links a the start and end
- var games = [];
- var retries = 0;
- setLocation(browser, user, pagePrefix + currentPage, function(error, browser) {
- if (error) {
- }
- });
- scrape();
- function retry() {
- browser.reload(scrape);
- }
- function next() {
- currentPage++;
- browser.clickLink(pageLinks[currentPage], scrape);
- }
- function scrape(error) {
- if (error || browserError(browser)) {
- console.log("[gameList] error scraping page", currentPage, numPages, error, browser.error, browser.html());
- retries++;
- if (retries < 10) {
- console.log("[gameList] retrying page", currentPage, numPages);
- return tryAgain(retry, retries);
- } else {
- return callback(error || browser.error);
- }
- }
- console.log("[gameList] scraping page", currentPage, numPages, browser.html());
- var rows = browser.queryAll("td[class=row]");
- retries = 0;
- for (var i = 0; i < rows.length; i++) {
- console.log(rows[i]);
- var a = browser.query("a", rows[i]);
- var span = browser.query("span", a);
- var versionId = a.href.replace("https://developer.amazon.com/application/general/", "").replace("/detail.html", "");
- games.push({
- title: span.innerHTML,
- versionId: versionId
- });
- }
- if (currentPage < numPages) {
- return setTimeout(next, sleep());
- }
- // finished
- console.log("[gameList] finished", games.length + " games");
- return callback(null, browser, games);
- }
- },
- /**
- * Scrapes a user's games and collects all the meta
- * data for it.
- */
- gameData: function(user, games, browser, callback) {
- var gameData = {};
- var gameIds = [];
- var i;
- for (i = 0; i < games.length; i++) {
- gameData[games[i].versionId] = games[i];
- gameIds.push(games[i].versionId);
- }
- function scrape() {
- var gameId = gameIds.shift();
- var game = gameData[gameId];
- var jobs = [pricing, text, general, releaseNotes, contentRatings, media];
- setLocation(browser, user, "/application/general/" + gameId + "/detail.html", function(error, browser) {
- if (error) {
- console.log("[gameList] error setting initial page", error, browser.error);
- return callback(error || browser.error || "Missing page body");
- }
- next();
- });
- function done() {
- if (jobs.length === 0) {
- // no more games
- if (gameIds.length === 0) {
- games = [];
- for (var game in gameData) {
- games.push(game);
- }
- return callback(null, browser, games);
- }
- // next game
- return setTimeout(scrape, sleep());
- }
- // next job
- return setTimeout(next, sleep());
- }
- function next() {
- return jobs.shift()(gameId, game, done);
- }
- }
- scrape();
- // pricing data
- function pricing(gameId, game, done) {
- var retries = 0;
- browser.clickLink("#header_nav_availability_pricing_a", process);
- function retry() {
- browser.reload(process);
- }
- function process(error) {
- if (error) {
- console.log("[gameData.pricing] error processing page", error, browser.error);
- retries++;
- if (retries < 10) {
- console.log("[gameData.pricing] retrying page");
- return tryAgain(retry, retries);
- } else {
- return callback(error || browser.error);
- }
- }
- game.pricing = { };
- var prow = browser.query("#ro_cal_prices");
- var pValues = browser.queryAll("span[class=pricing-val]", prow);
- var pCurrencies = browser.queryAll("span[class=pricing-currency]", prow);
- for (i = 0; i < pValues.length; i++) {
- var curr = pCurrencies[i].innerHTML.trim();
- var price = pValues[i].innerHTML.trim().substring(1);
- game.pricing[curr] = price;
- }
- // TODO: availability
- done();
- }
- }
- // text data
- function text(gameId, game, done) {
- var retries = 0;
- browser.clickLink("#header_nav_description_a", process);
- function retry() {
- browser.reload(process);
- }
- function process(error) {
- if (error) {
- console.log("[gameData.text] error processing page", error, browser.error);
- retries++;
- if (retries < 10) {
- console.log("[gameData.pricing] retrying page");
- return tryAgain(retry, retries);
- } else {
- return callback(error || browser.error);
- }
- }
- // get the languages
- var langList = browser.query("#collectable_nav_list");
- var langLinks = browser.queryAll("a", langList);
- var langArr = [];
- game.languages = {};
- for (i = 0; i < langLinks.length; i++) {
- var lName = langLinks[i].innerHTML;
- var lid = langLinks[i].href.replace("https://developer.amazon.com/application/description/", "")
- .replace("/detail.html", "");
- game.languages[lid] = lName;
- langArr.push(lid);
- }
- var currLang = browser.query("span", langList).innerHTML.trim();
- game.languages[gameId] = {
- language: currLang,
- title: browser.query("#ro_display_title").innerHTML.trim(),
- shortDesc: browser.query("#ro_short_desc").innerHTML.trim(),
- longDesc: browser.query("#ro_long_desc").innerHTML.trim(),
- features: browser.query("#ro_bullets").innerHTML.trim(),
- keywords: browser.query("#ro_keywords").innerHTML.trim()
- };
- function scrapeLanguage() {
- var langId = langArr.shift();
- browser.visit("/application/description/" + langId + "/detail.html", function(error) {
- if (error || browser.error) {
- return callback(error || browser.error);
- }
- game.languages[langId] = {
- language: currLang,
- title: browser.query("#ro_display_title").innerHTML.trim(),
- shortDesc: browser.query("#ro_short_desc").innerHTML.trim(),
- longDesc: browser.query("#ro_long_desc").innerHTML.trim(),
- features: browser.query("#ro_bullets").innerHTML.trim(),
- keywords: browser.query("#ro_keywords").innerHTML.trim()
- };
- if (langArr.length > 0) {
- return setTimeout(scrapeLanguage, 500);
- }
- return done();
- });
- }
- }
- }
- // general data
- function general(gameId, game, done) {
- browser.visit("/application/general/" + gameId + "/detail.html", function(error) {
- if (error || browser.error) {
- return callback(error || browser.error);
- }
- game.title = browser.query("#ro_title").innerHTML.trim();
- game.sku = browser.query("#ro_sku").innerHTML.trim();
- game.privacy = browser.query("#ro_privacy_policy_url").innerHTLM.trim();
- game.categories = browser.query("#ro_category").innerHTML.trim().split(" - ");
- game.support = {
- email: browser.query("#ro_support_email").innerHTLM.trim(),
- phone: browser.query("#ro_support_phone").innerHTLM.trim(),
- website: browser.query("#ro_support_website").innerHTLM.trim(),
- };
- // app key doesn't have an ID on the td
- var tds = browser.queryAll("td", browser.query("#ro_title").parentNode);
- for (i = 0; i < tds.length; i++) {
- if (tds[i].innerHTML.indexOf("Application Key") > -1) {
- game.appKey = tds[i + 1].innerHTLM.trim();
- break;
- }
- }
- done();
- });
- }
- function releaseNotes(gameId, game, done) {
- browser.visit("/application/releasenotes/" + gameId + "/detail.html?default", function(error) {
- if (error || browser.error) {
- return callback(error || browser.error);
- }
- // get the languages
- var langList = browser.query("#collectable_nav_list");
- var lankLinks = browser.queryAll("a", langList);
- var langArr = [];
- game.languages = {};
- for (i = 0; i < languages.length; i++) {
- var lName = languages[i].innerHTML;
- var lid = languages[i].href.replace("https://developer.amazon.com/application/releasenotes/", "").replace("/detail.html", "");
- languages[lid] = lName;
- langArr.push(lid);
- }
- var currLang = browser.query("span", langList).innerHTML.trim();
- game.languages[gameId] = {
- language: currLang,
- title: browser.query("#ro_display_title").innerHTML.trim(),
- shortDesc: browser.query("#ro_short_desc").innerHTML.trim(),
- longDesc: browser.query("#ro_long_desc").innerHTML.trim(),
- features: browser.query("#ro_bullets").innerHTML.trim(),
- keywords: browser.query("#ro_keywords").innerHTML.trim()
- };
- function scrapeLanguage() {
- var langId = langArr.shift();
- browser.visit("/application/releasenotes/" + langId + "/detail.html", function(error) {
- if (error || browser.error) {
- return callback(error || browser.error);
- }
- game.languages[langId] = {
- language: currLang,
- releaseNotes: browser.query("#ro_release_notes").innerHTML.trim()
- };
- if (langArr.length > 0) {
- return setTimeout(scrapeLanguage, 500);
- }
- return done();
- });
- }
- });
- }
- function contentRatings(gameId, game, done) {
- browser.visit("/application/rating/" + gameId + "/detail.html", function(error) {
- if (error || browser.error) {
- return callback(error || browser.error);
- }
- game.contentRatings = {
- alcohol: browser.query("#ro_maturity_field_2").innerHTML.trim(),
- cartoonViolence: browser.query("#ro_maturity_field_3").innerHTML.trim(),
- intolerance: browser.query("#ro_maturity_field_4").innerHTML.trim(),
- nudity: browser.query("#ro_maturity_field_7").innerHTML.trim(),
- profanity: browser.query("#ro_maturity_field_8").innerHTML.trim(),
- realisticViolence: browser.query("#ro_maturity_field_9").innerHTML.trim(),
- sex: browser.query("#ro_maturity_field_10").innerHTML.trim()
- };
- // other ratings
- var others = browser.query("#ro_maturity_boolean_flags").innerHTML.trim();
- game.contentRatings.others = {
- pii: others.indexOf("Account creation or other personal information collection") > -1,
- ads: others.indexOf("Advertisements") > -1,
- gambling: others.indexOf("Gambling") > -1,
- location: others.indexOf("Location detection or Location Based Services") > -1,
- userContent: others.indexOf("User Generated Content or User to User Communication") > -1
- };
- done();
- });
- }
- function media(gameId, game, done) {
- done();
- }
- }
- };
- /**
- * Sets the browser location to the necessary page
- */
- function setLocation(browser, user, url, callback, retries) {
- console.log("[setLocation] setting location", url, retries);
- retries = retries || 0;
- function retry() {
- setLocation(browser, user, url, callback, retries);
- }
- // get an authenticated session if we're not provided one
- if (!browser) {
- return amazon.authenticate(user, function(error, browser) {
- if (error || browserError(browser)) {
- retries++;
- if (retries < 10) {
- return tryAgain(retry, retries);
- } else {
- return callback(error || browser.error || "No document body");
- }
- }
- return setLocation(browser, user, url, callback);
- });
- }
- browser.visit(url, function(error) {
- if (error || browserError(browser)) {
- console.log("[setLocation] error visiting page", error, browser.error);
- retries++;
- if (retries < 10) {
- console.log("[setLocation] retrying");
- return tryAgain(retry, retries);
- }
- return callback(error || browser.error || "No document body", browser);
- }
- callback(null, browser);
- });
- }
- /**
- * A sleep timer for some period of time for use between
- * pageviews
- */
- function sleep(min) {
- return (1000 || min) + Math.floor(Math.random() * 2000);
- }
- function tryAgain(func, numRetries) {
- return setTimeout(func, sleep(numRetries * 1000));
- }
- /**
- * Catches discrete errors where the document doesn't actually
- * load but an exception isn't thrown until you try and access
- * the body.
- */
- function browserError(browser) {
- if (browser === null) {
- return true;
- }
- try {
- browser.queryAll("a");
- return false;
- } catch ( s ) {
- return true;
- }
- }
- // get the game list
- amazon.gameList(testUser, null, function(error, browser, games) {
- if (error) {
- return console.log("[amazon.gameList error]", error);
- }
- // add data
- amazon.gameData(testUser, games, browser, function(error, browser, games) {
- if (error) {
- return console.log("[amazon.gameData error]", error);
- }
- console.log(JSON.stringify(games[0]));
- });
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement