Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // ==UserScript==
- // @name Pending Earnings
- // @namespace http://bluesky.software.com/turkscripts
- // @description Adds a pending earnings item to mturk dashboard
- // @include https://www.mturk.com/mturk/dashboard
- // ==/UserScript==
- //
- // We are on the dashboard page. We want to go to the status_detail
- // pages for the last 30 days and total the dollar amount of the
- // HITs still pending and add it to the dashboard page like the today's projected earnings script
- // does. We will use the XMLHttpRequest Object to get the pages and then
- // process them one by one until we have done them all. An enhancement I just
- // thought of is that we don't have to do all 30 days if we read the number of
- // HITs pending and just process dates that have pending HITs.
- //
- // This will only be run if someone clicks on it as it is going to be really slow if they
- // have a lot of HITs pending from a long time ago.
- //
- //
- // 08/22/2011 Coding started
- //
- // 08/23/2011 Beta version ready but I only have 1 HIT pending so no way to do
- // much testing, will have to do some turking and then test
- //
- // 08/25/2011 There is the potential for a race condition in that while I'm
- // added up pending HITs to get to the total that was present on the
- // status page will have been approved before I've added them all up.
- // So I need to add some new logic otherwise it would be possible to
- // end up in an infinite loop trying to add in some pending HITs that
- // are no longer pending. I'm going to implement this by keeping track
- // of the number of HITs submitted and when reading detailed status
- // pages abort trying to add up x pending HITs if we exceed the number
- // of HITs submitted for that day.
- //
- // 08/30/2011 Not working for some so changed the way getNumberOfPending() works
- // and added an error message alert if this function can't find the
- // number of pending HITs
- //
- // 08/31/2011 Made a test version, the problem is located in
- // process_status_page()
- //
- // 09/01/2011 Found the problem: If there was no link in a row then trying to
- // to work on links[0] that doesn't exist would cause the script to blow
- // up, I put in a check that links has a length before trying to work with
- // it.
- //
- // 10/03/2011 Modified to save a new cookie which is a history of the previous time we
- // summed up the pendingEarnings and we use that to skip processing dates where
- // the pendingHits hasn't changed since the last time we were invoked. This should
- // minimize the times the page request exceeded error is encountered.
- //
- // 10/29/2011 Changed the expiration date of the pendingEarnings cookie to 30 days.
- //
- // 11/03/2011 Moved the setting of the pendingEarnings cookie outside the if loop to fix the
- // change of 10/29.
- //
- // The normal functioning of the script is to just get the cookie
- // of PendingEarnings and display the link and total.
- //
- // If we are clicked on then processPendingHits is called and we will
- // add up the pending HITs. Note there could still be no pending HITs
- // the user just clicked on it anyway so we can't ignore the case of
- // zero HITs to process. The total is saved in a cookie so it is
- // available for all the times we haven't been clicked on.
- //
- // Insert the Pending Earnings in the dashboard.
- // Copied from current_earnings script - Copyright (c) 2008, Mr. Berserk
- //
- // Modified to suit
- //
- var allTds, thisTd;
- allTds = document.getElementsByTagName('td');
- for (var i = 0; i < allTds.length; i++)
- {
- thisTd = allTds[i];
- if ( thisTd.innerHTML.match(/Total Earnings/) && thisTd.className.match(/metrics\-table\-first\-value/) )
- {
- var row = document.createElement('tr');
- row.className = "even";
- var pendingEarningsLink = document.createElement('a');
- pendingEarningsLink.href = "https://www.mturk.com/mturk/dashboard";
- pendingEarningsLink.innerHTML = "Pending HITs";
- pendingEarningsLink.addEventListener('click',processPendingHits,false);
- var cellLeft = document.createElement('td');
- cellLeft.className = "metrics-table-first-value";
- cellLeft.appendChild(pendingEarningsLink);
- row.appendChild(cellLeft);
- var cellRight = document.createElement('td');
- if(getCookie("MturkPendingEarnings"))
- {
- var pendingEarnings = parseFloat(getCookie("MturkPendingEarnings"));
- cellRight.innerHTML = "$" + pendingEarnings.toFixed(2);
- }
- else
- {
- cellRight.innerHTML = "$?.??";
- }
- row.appendChild(cellRight);
- thisTd.parentNode.parentNode.insertBefore(row,thisTd.parentNode.nextSibling);
- }
- }
- //
- // Functions
- //
- //
- // User has clicked on us, so add up all the pending HITs. The first thing
- // we do is get a copy of the status page, this contains the summary of the
- // past 30 days. We scan through this looking at the Pending HITs column and
- // saving the dates that have pending HITs which we will subsequently use to
- // access the appropriate detailed status pages :)
- //
- function processPendingHits()
- {
- var pendingEarnings = 0; // Dollar amount of pendingHITs
- var pendingHits = getNumberOfPending();
- if(pendingHits>0) // only process pages if there is at least one pending HIT
- {
- var oldDatesToDo = new Array(); // this array will hold the history of the last
- // time we were clicked
- var datesToDo = process_status_page(); // get dates that have pending HITs
- //
- // Ok we have a list of dates to process and the number of pending HITS
- // for that date is appended to the encoded date. To make the script faster and
- // more efficient we save the array datesToDo as a cookie and compare the new values
- // just retrieved from the status page to skip processing those dates where the number of
- // pending HITs hasn't changed since the last time we were called. The one exception is today
- // where the pending HIT count might not of changed but the hits pending might have since more
- // hits could of been added. To catch this we also check the submitted HITs to the old submitted HITs
- //
- if(getCookie("MturkPendingDates")) // retrieve history if it exists
- {
- oldDatesToDo = getCookie("MturkPendingDates").split(",");
- }
- //
- // Now we want to compare the old array with the new array and only process those dates where we don't have
- // the subtotal in the old array. But if the date is today, the pending HIT count could be the same but
- // but they could be different HITs because pending HITs could be added and subtracted so we have to check the
- // submitted HITs to catch this condition.
- //
- var subtotal = 0;
- for(n = 0; n < datesToDo.length; n++)
- {
- var dateProcessed = 0;
- var encodedDate = datesToDo[n].substr(0,8);
- var index1 = datesToDo[n].indexOf('$');
- var pendingHits = datesToDo[n].substring(8,index1); // the next part of the string up to the $
- // is the pending Hits total
- var submittedHits = datesToDo[n].substring(index1+1);
- for(var m = 0; m < oldDatesToDo.length; m++) // check if we have this date in the history
- {
- var old_encodedDate = oldDatesToDo[m].substr(0,8);
- if(encodedDate == old_encodedDate)
- {
- index1 = oldDatesToDo[m].indexOf('$');
- var old_pendingHits = oldDatesToDo[m].substring(8,index1); // the next part of the string up to the $
- // is the pending Hits total
- var index2 = oldDatesToDo[m].indexOf('%'); // the next part of the string up to the %
- // is the submitted HITs total
- var old_submittedHits = oldDatesToDo[m].substring(index1+1,index2);
- var old_subtotal = oldDatesToDo[m].substring(index2+1); // the rest of the string is the subtotal
- // So since we have this date in the history we check if the pending HITs and the submitted HITs are the
- // same and if they are we don't bother to process this date by NOT adding it to the processDates array
- // and we add the existing subtotal for that date into the pendingHITs dollar amount right now.
- if (submittedHits == old_submittedHits && pendingHits == old_pendingHits)
- {
- pendingEarnings += parseFloat(old_subtotal); // use the old value since it is still good
- datesToDo[n] = datesToDo[n] + '%' + old_subtotal; // add old subtotal into the new array
- dateProcessed = 1;
- break; // found the date so exit loop
- }
- }
- }
- if(dateProcessed < 1) // if the date wasn't in the history or the
- { // pending HITs has changed process the date
- subtotal = process_detailed_status_pages(encodedDate, pendingHits, submittedHits);
- datesToDo[n] = datesToDo[n] + '%' + subtotal;
- pendingEarnings += subtotal;
- }
- }
- // now overwrite the oldDatesToDo with the new one if the cookie already existed else create the cookie
- setCookie("MturkPendingDates",datesToDo.join(","),1); // Save the array datesToDo as a cookie by converting
- // it to a string first
- }
- setCookie("MturkPendingEarnings",pendingEarnings,30); // save total in cookie - move out here so we set it
- // to zero if no HITs pending
- }
- //
- // Get total pending HITs
- //
- // As of now there are 8 'metrics-table's on the dashboard
- // but the last two are subsets of table 6 which has 26 td's
- // but again we have subsets within subsets so that was the confusion
- // So when you get the td's for table 5 you are also getting the td's
- // for table 7 which is what we want. The confusion is when I try to
- // match on innerHTML for ... Pending, I get a match for the superset
- // td's not on the individual td. So td 14 contains ... Pending but it
- // also contains all the other td's that are part of the second column
- //
- // To handle the recursive tables I'm now checking that the td is
- // numeric, if it isn't we continue to scan tds.
- //
- function getNumberOfPending()
- {
- var tables = document.getElementsByClassName('metrics-table');
- for (var m = 0; m <tables.length; m++)
- {
- var table_data = tables[m].getElementsByTagName('td'); // 26 data
- for (var n = 0; n <table_data.length; n++)
- {
- if(table_data[n].innerHTML.match('... Pending'))
- {
- if(isNumber(table_data[n+1].innerHTML))
- {
- return table_data[n+1].innerHTML;
- }
- }
- }
- }
- alert("Script Bug: Can't find how many pending HITs you have");
- return -1; // didn't find it - This is a bug exit!
- }
- //
- // Process the status page by getting all those dates that have pending HITs
- //
- // The first status page contains 30 days worth of HITs so there is no need to
- // check the second page as there should be no pending HITS on that page
- //
- // Note: If the person is a total newbie then maybe the status page doesn't even
- // have 30 days worth of data so don't hard code the 30 bozo :)
- //
- //
- // We need to process the DOM in a recursive manner so that I can associate the
- // correct date with the correct pending HIT count. The days listed aren't necessarly
- // in sequential order if he took some days off there will be missing days.
- //
- // This main function just grabs the status page and then calls the function
- // pending_dates to do the actual work
- //
- function process_status_page()
- {
- // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
- var page = getHTTPObject();
- page.open("GET",'https://www.mturk.com/mturk/status',false);
- page.send(null);
- return pending_dates(page.responseText);
- }
- //
- // First of all we have to turn the grabbed status page back into a DOM
- // object so we can process it with javascript. We do this with the
- // create div trick.
- //
- // Now get all the tables
- // Some of these tables won't be what we are looking for but there is no
- // way to distingush at the table level, we have to look at the table data to
- // know if this table is of interest to us.
- //
- // There is a problem that the table we are interested in is embedded in another
- // table so we are processing the table we want twice, how to avoid this?
- //
- function pending_dates(page_text)
- {
- var page_html = document.createElement('div');
- page_html.innerHTML = page_text;
- var datesIndex = 0;
- var activeDates = new Array();
- var tableProcessed = 0;
- var tables = page_html.getElementsByTagName('table');
- for (var m = 0; m < tables.length; m++) // process each table
- {
- var table_rows = tables[m].getElementsByTagName('tr');
- for (var n = 0; n < table_rows.length; n++) // process each row
- {
- //
- // if we are in a row we are interested in, its first td will contain a link
- // to a detailed status page, we look for a match on that link
- //
- var links = table_rows[n].getElementsByTagName('a');
- if(links.length>0 && links[0].href.substr(0,40)=='https://www.mturk.com/mturk/statusdetail')
- {
- //
- // OK we found an interesting row, now does this date have any pending HITs?
- //
- tableProcessed = 1; // Indicate that we have processed the table
- var table_data = table_rows[n].getElementsByClassName('statusPendingColumnValue');
- var pendingHits = table_data[0].innerHTML; // This is a number, if greater than zero we have pending HITs
- table_data = table_rows[n].getElementsByClassName('statusSubmittedColumnValue');
- var submittedHits = table_data[0].innerHTML; // Number of HITs submitted for this date
- if(pendingHits > 0) //then save the date in the activeDates array
- {
- var encodedDate = links[0].href.substr(links[0].href.search(/Date=/)+5,8);
- // as a hack I'm appending the number of pending HITs to the encoded date so we
- // can return both pieces of data through the one dimensional array
- // now I want to add in the number of HITs submitted also but I have to be able
- // to parse the string later to distingush the two numbers
- activeDates[datesIndex++] = encodedDate + pendingHits + '$' + submittedHits;
- }
- }
- }
- if(tableProcessed>0) return activeDates; // bail if we already processed the table, don't do it again!
- }
- alert("Script Bug: Couldn't find any dates to process");
- }
- //
- // Process the detailed status pages associated with this date until we have
- // found all the pending HITs for this date
- //
- function process_detailed_status_pages(encodedDate, pendingHits, submittedHits)
- {
- var subtotal = 0;
- var pagenum = 1; // detailed status page number, we start with page 1
- while (pendingHits > 0)
- {
- // use XMLHttpRequest to fetch the entire page, use async mode for now because I understand it
- var page = getHTTPObject();
- link = "https://www.mturk.com/mturk/statusdetail?sortType=All&pageNumber=" + pagenum + "&encodedDate=" + encodedDate;
- page.open("GET",link,false);
- page.send(null);
- var page_html = document.createElement('div');
- page_html.innerHTML = page.responseText;
- var amounts = page_html.getElementsByClassName('statusdetailAmountColumnValue');
- var statuses = page_html.getElementsByClassName('statusdetailStatusColumnValue');
- for(var k = 0; k < amounts.length; k++)
- {
- if(statuses[k].innerHTML == 'Pending Approval')
- {
- pendingHits--;
- index = amounts[k].innerHTML.indexOf('$');
- subtotal += parseFloat(amounts[k].innerHTML.substring(index+1));
- }
- }
- submittedHits -= 25; // 25 HITs to a page
- if (submittedHits <= 0) return subtotal; // We have done all the HITs for this date
- // But the pendingHits count isn't zero
- // So we must of encountered a race condition
- // exit with the subtotal
- pagenum++; // go do another page if necessary
- }
- return subtotal; // This is the dollar amount of pending HITs for this date
- }
- //
- // XMLHttpRequest wrapper from web
- //
- function getHTTPObject()
- {
- if (typeof XMLHttpRequest != 'undefined')
- {
- return new XMLHttpRequest();
- }
- try
- {
- return new ActiveXObject("Msxml2.XMLHTTP");
- }
- catch (e)
- {
- try
- {
- return new ActiveXObject("Microsoft.XMLHTTP");
- }
- catch (e) {}
- }
- return false;
- }
- //
- // Is the variable a number or a string that parses to a number?
- //
- function isNumber (o)
- {
- return ! isNaN(o-0);
- }
- //
- // Cookie functions copied from http://www.w3schools.com/JS/js_cookies.asp
- //
- function setCookie(cookie_name,value,exdays)
- {
- var exdate = new Date();
- exdate.setDate(exdate.getDate() + exdays);
- var cookie_value = escape(value) + ((exdays==null) ? "" : "; expires="+exdate.toUTCString());
- document.cookie = cookie_name + "=" + cookie_value;
- }
- function getCookie(cookie_name)
- {
- var i,x,y
- var ARRcookies = document.cookie.split(";");
- for (i=0; i<ARRcookies.length; i++)
- {
- x = ARRcookies[i].substr(0,ARRcookies[i].indexOf("="));
- y = ARRcookies[i].substr(ARRcookies[i].indexOf("=")+1);
- x = x.replace(/^\s+|\s+$/g,"");
- if (x == cookie_name)
- {
- return unescape(y);
- }
- }
- }
Add Comment
Please, Sign In to add comment