iamabear

[mTurk userscript] Pending Earnings

Oct 17th, 2012
404
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // ==UserScript==
  2. // @name           Pending Earnings
  3. // @namespace      http://bluesky.software.com/turkscripts
  4. // @description    Adds a pending earnings item to mturk dashboard
  5. // @include        https://www.mturk.com/mturk/dashboard
  6. // ==/UserScript==
  7.  
  8. //
  9. // We are on the dashboard page. We want to go to the status_detail
  10. // pages for the last 30 days and total the dollar amount of the
  11. // HITs still pending and add it to the dashboard page like the today's projected earnings script
  12. // does. We will use the XMLHttpRequest Object to get the pages and then
  13. // process them one by one until we have done them all. An enhancement I just
  14. // thought of is that we don't have to do all 30 days if we read the number of
  15. // HITs pending and just process dates that have pending HITs.
  16. //
  17. // This will only be run if someone clicks on it as it is going to be really slow if they
  18. // have a lot of HITs pending from a long time ago.
  19. //
  20. //
  21. // 08/22/2011 Coding started
  22. //
  23. // 08/23/2011 Beta version ready but I only have 1 HIT pending so no way to do
  24. //            much testing, will have to do some turking and then test
  25. //
  26. // 08/25/2011 There is the potential for a race condition in that while I'm
  27. //            added up pending HITs to get to the total that was present on the
  28. //            status page will have been approved before I've added them all up.
  29. //            So I need to add some new logic otherwise it would be possible to
  30. //            end up in an infinite loop trying to add in some pending HITs that
  31. //            are no longer pending. I'm going to implement this by keeping track
  32. //            of the number of HITs submitted and when reading detailed status
  33. //            pages abort trying to add up x pending HITs if we exceed the number
  34. //            of HITs submitted for that day.
  35. //
  36. // 08/30/2011 Not working for some so changed the way getNumberOfPending() works
  37. //            and added an error message alert if this function can't find the
  38. //            number of pending HITs
  39. //
  40. // 08/31/2011 Made a test version, the problem is located in  
  41. //            process_status_page()
  42. //
  43. // 09/01/2011 Found the problem: If there was no link in a row then trying to
  44. //            to work on links[0] that doesn't exist would cause the script to blow
  45. //            up, I put in a check that links has a length before trying to work with
  46. //            it.
  47. //
  48. // 10/03/2011 Modified to save a new cookie which is a history of the previous time we
  49. //            summed up the pendingEarnings and we use that to skip processing dates where
  50. //            the pendingHits hasn't changed since the last time we were invoked. This should
  51. //            minimize the times the page request exceeded error is encountered.
  52. //
  53. // 10/29/2011 Changed the expiration date of the pendingEarnings cookie to 30 days.
  54. //
  55. // 11/03/2011 Moved the setting of the pendingEarnings cookie outside the if loop to fix the
  56. //            change of 10/29.
  57. //
  58. // The normal functioning of the script is to just get the cookie
  59. // of PendingEarnings and display the link and total.
  60. //
  61. // If we are clicked on then processPendingHits is called and we will
  62. // add up the pending HITs. Note there could still be no pending HITs
  63. // the user just clicked on it anyway so we can't ignore the case of
  64. // zero HITs to process. The total is saved in a cookie so it is
  65. // available for all the times we haven't been clicked on.
  66. //
  67.  
  68. // Insert the Pending Earnings in the dashboard.
  69. // Copied from current_earnings script - Copyright (c) 2008, Mr. Berserk
  70. //
  71. // Modified to suit
  72. //
  73.  
  74. var allTds, thisTd;
  75. allTds = document.getElementsByTagName('td');
  76. for (var i = 0; i < allTds.length; i++)
  77. {
  78.    thisTd = allTds[i];
  79.    if ( thisTd.innerHTML.match(/Total Earnings/) && thisTd.className.match(/metrics\-table\-first\-value/) )
  80.    {
  81.       var row = document.createElement('tr');
  82.       row.className = "even";
  83.  
  84.  
  85.       var pendingEarningsLink = document.createElement('a');
  86.       pendingEarningsLink.href =  "https://www.mturk.com/mturk/dashboard";
  87.       pendingEarningsLink.innerHTML = "Pending HITs";
  88.       pendingEarningsLink.addEventListener('click',processPendingHits,false);
  89.       var cellLeft = document.createElement('td');
  90.       cellLeft.className = "metrics-table-first-value";
  91.       cellLeft.appendChild(pendingEarningsLink);
  92.       row.appendChild(cellLeft);
  93.          
  94.       var cellRight = document.createElement('td');  
  95.       if(getCookie("MturkPendingEarnings"))
  96.       {
  97.          var pendingEarnings = parseFloat(getCookie("MturkPendingEarnings"));
  98.          cellRight.innerHTML = "$" + pendingEarnings.toFixed(2);
  99.       }
  100.       else
  101.       {
  102.          cellRight.innerHTML = "$?.??";
  103.       }
  104.       row.appendChild(cellRight);
  105.              
  106.       thisTd.parentNode.parentNode.insertBefore(row,thisTd.parentNode.nextSibling);
  107.    }
  108. }
  109.  
  110. //
  111. // Functions
  112. //
  113.  
  114. //
  115. // User has clicked on us, so add up all the pending HITs. The first thing
  116. // we do is get a copy of the status page, this contains the summary of the
  117. // past 30 days. We scan through this looking at the Pending HITs column and
  118. // saving the dates that have pending HITs which we will subsequently use to
  119. // access the appropriate detailed status pages :)
  120. //
  121.  
  122. function processPendingHits()
  123. {
  124.    var pendingEarnings = 0;            // Dollar amount of pendingHITs
  125.    var pendingHits = getNumberOfPending();
  126.    if(pendingHits>0)   // only process pages if there is at least one pending HIT
  127.    {
  128.       var oldDatesToDo = new Array();           // this array will hold the history of the last
  129.                                                 // time we were clicked
  130.       var datesToDo = process_status_page();    // get dates that have pending HITs
  131.  
  132.       //
  133.       // Ok we have a list of dates to process and the number of pending HITS
  134.       // for that date is appended to the encoded date. To make the script faster and
  135.       // more efficient we save the array datesToDo as a cookie and compare the new values
  136.       // just retrieved from the status page to skip processing those dates where the number of
  137.       // pending HITs hasn't changed since the last time we were called. The one exception is today
  138.       // where the pending HIT count might not of changed but the hits pending might have since more
  139.       // hits could of been added. To catch this we also check the submitted HITs to the old submitted HITs
  140.       //
  141.  
  142.       if(getCookie("MturkPendingDates"))   // retrieve history if it exists
  143.       {
  144.          oldDatesToDo = getCookie("MturkPendingDates").split(",");        
  145.       }
  146.  
  147.       //
  148.       // Now we want to compare the old array with the new array and only process those dates where we don't have
  149.       // the subtotal in the old array. But if the date is today, the pending HIT count could be the same but
  150.       // but they could be different HITs because pending HITs could be added and subtracted so we have to check the
  151.       // submitted HITs to catch this condition.
  152.       //
  153.  
  154.       var subtotal = 0;                  
  155.       for(n = 0; n < datesToDo.length; n++)
  156.       {
  157.          var dateProcessed = 0;
  158.          var encodedDate = datesToDo[n].substr(0,8);
  159.          var index1 = datesToDo[n].indexOf('$');
  160.          var pendingHits = datesToDo[n].substring(8,index1); // the next part of the string up to the $
  161.                                                             // is the pending Hits total
  162.          var submittedHits = datesToDo[n].substring(index1+1);    
  163.  
  164.          for(var m = 0; m < oldDatesToDo.length; m++)            // check if we have this date in the history
  165.          {
  166.             var old_encodedDate = oldDatesToDo[m].substr(0,8);
  167.             if(encodedDate == old_encodedDate)
  168.             {
  169.                index1 = oldDatesToDo[m].indexOf('$');
  170.                var old_pendingHits = oldDatesToDo[m].substring(8,index1); // the next part of the string up to the $
  171.                                                                           // is the pending Hits total
  172.                var index2 = oldDatesToDo[m].indexOf('%');                 // the next part of the string up to the %
  173.                                                                           //  is the submitted HITs total
  174.                var old_submittedHits = oldDatesToDo[m].substring(index1+1,index2);
  175.  
  176.                var old_subtotal = oldDatesToDo[m].substring(index2+1);   // the rest of the string is the subtotal  
  177.  
  178.                // So since we have this date in the history we check if the pending HITs and the submitted HITs are the
  179.                // same and if they are we don't bother to process this date by NOT adding it to the processDates array
  180.                // and we add the existing subtotal for that date into the pendingHITs dollar amount right now.
  181.  
  182.                if (submittedHits == old_submittedHits && pendingHits == old_pendingHits)
  183.                {
  184.                  
  185.                   pendingEarnings += parseFloat(old_subtotal);           // use the old value since it is still good
  186.                   datesToDo[n] = datesToDo[n] + '%' + old_subtotal;      // add old subtotal into the new array
  187.                   dateProcessed = 1;
  188.                   break;                                                 // found the date so exit loop        
  189.                }                  
  190.             }  
  191.          }  
  192.          if(dateProcessed < 1)                                           // if the date wasn't in the history or the
  193.          {                                                               // pending HITs has changed process the date
  194.             subtotal = process_detailed_status_pages(encodedDate, pendingHits, submittedHits);
  195.             datesToDo[n] = datesToDo[n] + '%' + subtotal;
  196.             pendingEarnings += subtotal;
  197.          }
  198.       }      
  199.       // now overwrite the oldDatesToDo with the new one if the cookie already existed else create the cookie
  200.       setCookie("MturkPendingDates",datesToDo.join(","),1);   // Save the array datesToDo as a cookie by converting
  201.                                                               //  it to a string first
  202.  
  203.    }
  204.       setCookie("MturkPendingEarnings",pendingEarnings,30);    // save total in cookie - move out here so we set it
  205.                                                                // to zero if no HITs pending
  206. }
  207.  
  208.  
  209. //
  210. // Get total pending HITs
  211. //
  212. // As of now there are 8 'metrics-table's on the dashboard
  213. // but the last two are subsets of table 6 which has 26 td's
  214. // but again we have subsets within subsets so that was the confusion
  215. // So when you get the td's for table 5 you are also getting the td's
  216. // for table 7 which is what we want. The confusion is when I try to
  217. // match on innerHTML for ... Pending, I get a match for the superset
  218. // td's not on the individual td. So td 14 contains ... Pending but it
  219. // also contains all the other td's that are part of the second column
  220. //
  221. // To handle the recursive tables I'm now checking that the td is
  222. // numeric, if it isn't we continue to scan tds.
  223. //
  224. function getNumberOfPending()
  225. {
  226.    var tables = document.getElementsByClassName('metrics-table');
  227.    for (var m = 0; m <tables.length; m++)
  228.    {
  229.       var table_data = tables[m].getElementsByTagName('td');  // 26 data
  230.       for (var n = 0; n <table_data.length; n++)
  231.       {
  232.          if(table_data[n].innerHTML.match('... Pending'))
  233.          {                                                
  234.             if(isNumber(table_data[n+1].innerHTML))
  235.             {
  236.                return table_data[n+1].innerHTML;
  237.             }        
  238.          }
  239.       }
  240.    }
  241.    alert("Script Bug: Can't find how many pending HITs you have");
  242.    return -1;           // didn't find it - This is a bug exit!
  243. }
  244.  
  245. //
  246. // Process the status page by getting all those dates that have pending HITs
  247. //
  248. // The first status page contains 30 days worth of HITs so there is no need to
  249. // check the second page as there should be no pending HITS on that page
  250. //
  251. // Note: If the person is a total newbie then maybe the status page doesn't even
  252. // have 30 days worth of data so don't hard code the 30 bozo :)
  253. //
  254. //
  255. // We need to process the DOM in a recursive manner so that I can associate the
  256. // correct date with the correct pending HIT count. The days listed aren't necessarly
  257. // in sequential order if he took some days off there will be missing days.
  258. //
  259. // This main function just grabs the status page and then calls the function
  260. // pending_dates to do the actual work
  261. //
  262.  
  263. function process_status_page()
  264. {
  265.    // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
  266.    var page = getHTTPObject();
  267.    page.open("GET",'https://www.mturk.com/mturk/status',false);      
  268.    page.send(null);
  269.    return pending_dates(page.responseText);
  270. }
  271.  
  272. //
  273. // First of all we have to turn the grabbed status page back into a DOM
  274. // object so we can process it with javascript. We do this with the
  275. // create div trick.
  276. //
  277. // Now get all the tables
  278. // Some of these tables won't be what we are looking for but there is no
  279. // way to distingush at the table level, we have to look at the table data to
  280. // know if this table is of interest to us.
  281. //
  282. // There is a problem that the table we are interested in is embedded in another
  283. // table so we are processing the table we want twice, how to avoid this?
  284. //
  285.  
  286. function pending_dates(page_text)
  287. {
  288.    var page_html = document.createElement('div');
  289.    page_html.innerHTML = page_text;
  290.  
  291.    var datesIndex = 0;
  292.    var activeDates = new Array();
  293.    var tableProcessed = 0;
  294.  
  295.    var tables = page_html.getElementsByTagName('table');
  296.    for (var m = 0; m < tables.length; m++) // process each table
  297.    {
  298.       var table_rows = tables[m].getElementsByTagName('tr');  
  299.       for (var n = 0; n < table_rows.length; n++) // process each row
  300.       {
  301.          //
  302.          // if we are in a row we are interested in, its first td will contain a link
  303.          // to a detailed status page, we look for a match on that link
  304.          //
  305.          var links = table_rows[n].getElementsByTagName('a');
  306.          if(links.length>0 && links[0].href.substr(0,40)=='https://www.mturk.com/mturk/statusdetail')
  307.          {
  308.             //
  309.             // OK we found an interesting row, now does this date have any pending HITs?
  310.             //
  311.             tableProcessed = 1; // Indicate that we have processed the table
  312.             var table_data = table_rows[n].getElementsByClassName('statusPendingColumnValue');
  313.             var pendingHits = table_data[0].innerHTML;  // This is a number, if greater than zero we have pending HITs
  314.             table_data = table_rows[n].getElementsByClassName('statusSubmittedColumnValue');
  315.             var submittedHits = table_data[0].innerHTML; // Number of HITs submitted for this date
  316.             if(pendingHits > 0) //then save the date in the activeDates array
  317.             {
  318.                var encodedDate = links[0].href.substr(links[0].href.search(/Date=/)+5,8);
  319.                // as a hack I'm appending the number of pending HITs to the encoded date so we
  320.                // can return both pieces of data through the one dimensional array
  321.                // now I want to add in the number of HITs submitted also but I have to be able
  322.                // to parse the string later to distingush the two numbers
  323.                activeDates[datesIndex++] = encodedDate + pendingHits + '$' + submittedHits;        
  324.             }  
  325.          }
  326.       }
  327.       if(tableProcessed>0) return activeDates;  // bail if we already processed the table, don't do it again!
  328.    }  
  329.    alert("Script Bug: Couldn't find any dates to process");
  330. }
  331.  
  332. //
  333. // Process the detailed status pages associated with this date until we have
  334. // found all the pending HITs for this date
  335. //
  336.  
  337. function process_detailed_status_pages(encodedDate, pendingHits, submittedHits)
  338. {
  339.    var subtotal = 0;
  340.    var pagenum = 1;          // detailed status page number, we start with page 1
  341.    while (pendingHits > 0)
  342.    {
  343.       // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
  344.       var page = getHTTPObject();
  345.       link = "https://www.mturk.com/mturk/statusdetail?sortType=All&pageNumber=" + pagenum + "&encodedDate=" + encodedDate;
  346.       page.open("GET",link,false);      
  347.       page.send(null);
  348.       var page_html = document.createElement('div');
  349.       page_html.innerHTML = page.responseText;
  350.       var amounts = page_html.getElementsByClassName('statusdetailAmountColumnValue');
  351.       var statuses = page_html.getElementsByClassName('statusdetailStatusColumnValue');
  352.       for(var k = 0; k < amounts.length; k++)
  353.       {
  354.          if(statuses[k].innerHTML == 'Pending Approval')
  355.          {
  356.             pendingHits--;
  357.             index = amounts[k].innerHTML.indexOf('$');
  358.             subtotal += parseFloat(amounts[k].innerHTML.substring(index+1));
  359.          }
  360.       }
  361.       submittedHits -= 25;                       // 25 HITs to a page
  362.       if (submittedHits <= 0) return subtotal;   // We have done all the HITs for this date
  363.                                                  // But the pendingHits count isn't zero
  364.                                                  // So we must of encountered a race condition
  365.                                                  // exit with the subtotal    
  366.       pagenum++; // go do another page if necessary
  367.    }
  368.    return subtotal; // This is the dollar amount of pending HITs for this date
  369. }
  370.  
  371. //
  372. // XMLHttpRequest wrapper from web
  373. //
  374.  
  375. function getHTTPObject()  
  376. {
  377.    if (typeof XMLHttpRequest != 'undefined')
  378.    {
  379.       return new XMLHttpRequest();
  380.    }
  381.    try
  382.    {
  383.       return new ActiveXObject("Msxml2.XMLHTTP");
  384.    }
  385.    catch (e)
  386.    {
  387.       try
  388.       {
  389.          return new ActiveXObject("Microsoft.XMLHTTP");
  390.       }
  391.       catch (e) {}
  392.    }
  393.    return false;
  394. }
  395.  
  396. //
  397. // Is the variable a number or a string that parses to a number?
  398. //
  399.  
  400. function isNumber (o)
  401. {
  402.    return ! isNaN(o-0);
  403. }
  404.  
  405.  
  406. //
  407. //  Cookie functions copied from http://www.w3schools.com/JS/js_cookies.asp
  408. //
  409.  
  410. function setCookie(cookie_name,value,exdays)
  411. {
  412.    var exdate = new Date();
  413.    exdate.setDate(exdate.getDate() + exdays);
  414.    var cookie_value = escape(value) + ((exdays==null) ? "" : "; expires="+exdate.toUTCString());
  415.    document.cookie = cookie_name + "=" + cookie_value;
  416. }
  417.  
  418.  
  419. function getCookie(cookie_name)
  420. {
  421.    var i,x,y
  422.    var ARRcookies = document.cookie.split(";");
  423.    for (i=0; i<ARRcookies.length; i++)
  424.    {
  425.       x = ARRcookies[i].substr(0,ARRcookies[i].indexOf("="));
  426.       y = ARRcookies[i].substr(ARRcookies[i].indexOf("=")+1);
  427.       x = x.replace(/^\s+|\s+$/g,"");
  428.       if (x == cookie_name)
  429.       {
  430.          return unescape(y);
  431.       }
  432.    }
  433. }
Add Comment
Please, Sign In to add comment