Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // ==UserScript==
- // @name RecordingAdcogov
- // @namespace http://tampermonkey.net/
- // @version 1.0
- // @description Scraper for http://recording.adcogov.org
- // @author IB
- // @match http://recording.adcogov.org/LandmarkWeb/search/index?theme=.blue§ion=*
- // @grant none
- // @require https://cdnjs.cloudflare.com/ajax/libs/notify/0.4.2/notify.min.js
- // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.1.3/jszip.min.js
- // @require https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.3.3/FileSaver.min.js
- // @require https://cdnjs.cloudflare.com/ajax/libs/jszip-utils/0.0.2/jszip-utils.min.js
- // ==/UserScript==
- debugger;
- var App = (function () {
- var data = [];
- var lastRecordInstrumentNumber;
- var count = 0;
- return {
- waitFor: function (condition, log, callback, parameters) {
- console.log('Entering in waitfor');
- var checkExist = setInterval(function () {
- if (condition()) {
- console.log(log);
- clearInterval(checkExist);
- callback(parameters);
- }
- }, 100); // check every 100ms
- },
- start: function () {
- var that = this;
- that.waitFor(function () {
- return $('#resultsTable > tbody').children().length;
- },
- 'Results table was filled',
- this.scrapeResultPage, that);
- },
- getRowsElementsForClick: function () {
- return $("tr[role='row']([class='result odd'],[class='even result'])");
- },
- scrapeResultPage: function (obj) {
- let rows = obj.getRowsElementsForClick();
- if (confirm('Do you want to scrape pages + pdfs?'))
- if (rows.length > 1) {
- rows[2].children[3].click();
- obj.waitFor(function () {
- return $("label[for='Instrument # ']").length;
- },
- 'Label was loaded',
- obj.scrapeSingleRecord, obj
- );
- }
- else
- alert('Result table is empty')
- },
- scrapeSingleRecord: function (obj) {
- count++;
- let item = {};
- item['instrument'] = obj.getTextOfLabelNode('Instrument # ');
- $.notify('Scraping: ' + item['instrument'], 'success', {showDuration: 10000, position: 'center'});
- lastRecordInstrumentNumber = item['instrument'];
- item['bookPage'] = obj.getTextOfLabelNode('Book/Page ');
- item['recordDate'] = obj.getTextOfLabelNode('Record Date ');
- item['bookType'] = obj.getTextOfLabelNode('Book Type ');
- item['docType'] = obj.getTextOfLabelNode('Doc Type ');
- item['numberOfPages'] = obj.getTextOfLabelNode('Number of Pages ');
- item['tdOfPages'] = obj.getTextOfLabelNode('TD # of Pages ');
- item['grantor'] = obj.getTextOfLabelNode('Grantor ');
- item['grantee'] = obj.getTextOfLabelNode('Grantee ');
- item['salesPrice'] = obj.getTextOfLabelNode('Sales Price ');
- item['docLinks'] = obj.getTextOfLabelNode('Doc Links ');
- item['legal'] = obj.getTextOfLabelNode('Legal ');
- item['docLegals'] = obj.getTextOfLabelNode('Doc. Legals ');
- item['pdfUrl'] = obj.getPDFUrl();
- data.push(item);
- let nextNavigator = $('#directNavNext');
- if ($(nextNavigator).is(":visible")) {
- $('#directNavNext').click();
- }
- else {
- $.notify('Saving csv . . . ', 'success', {showDuration: 10000, position: 'center'});
- obj.toCsv(data);
- $.notify('Downloading pdfs . . . Please, wait.', 'success', {showDuration: 10000, position: 'center'});
- obj.savePdfs(data);
- }
- // fetch next record
- obj.waitFor(function () {
- return obj.getTextOfLabelNode('Instrument # ') !== lastRecordInstrumentNumber;
- },
- 'Fetching the next record...', obj.scrapeSingleRecord, obj);
- },
- getTextOfLabelNode: function (forLabel) {
- return $($(`label[for='${forLabel}']`).parent().parent().children()[1]).text().trim();
- },
- savePdfs: function (array) {
- let downloaded = 0;
- let zip = new JSZip();
- let download = function (item) {
- return new Promise(function (resolve) {
- try {
- JSZipUtils.getBinaryContent(item['pdfUrl'], function (err, data) {
- if (err) {
- throw err; // or handle the error
- }
- zip.file(item['instrument'] + '.pdf', data, {
- binary: true
- });
- downloaded++;
- $.notify('Downloaded pdf: ' + item['instrument'] + '.pdf | Downloaded ' + downloaded + '/' + array.length,
- 'success', {showDuration: 10000, position: 'center'});
- resolve();
- });
- }
- catch {
- console.log('Exception in pdf downloading ')
- }
- });
- };
- Promise.all(array.map(function (item) {
- return download(item);
- }))
- .then(function () {
- console.log(zip);
- zip.generateAsync({
- type: "blob"
- })
- .then(function (content) {
- saveAs(content, 'documents.zip');
- });
- });
- },
- toCsv: function (array) {
- var headers = [];
- for (var i = 0; i < array.length; i++) {
- var objHeaders = [];
- for (var key in array[i]) {
- if (headers.indexOf(key) == -1) {
- headers.push(key);
- }
- }
- }
- for (var i = 0; i < array.length; i++) {
- for (var j = 0; j < headers.length; j++) {
- if (!(headers[j] in array[i])) {
- array[i][headers[j]] = '';
- }
- }
- }
- var keys = Object.keys(array[0]);
- var result = '"' + keys.join('","') + '"' + '\n';
- array.forEach(function (obj) {
- keys.forEach(function (k, ix) {
- if (ix == 0) {
- result += '"' + obj[k].trim() + '"';
- } else {
- result += ',"' + obj[k].trim() + '"';
- }
- });
- result += "\n";
- });
- var a = document.createElement('a');
- a.href = 'data:attachment/csv,' + encodeURIComponent(result);
- a.target = '_blank';
- a.download = 'result.csv';
- document.body.appendChild(a);
- a.click();
- },
- getPDFUrl: function () {
- let regexp = /\(".*"\)/g;
- let functionBody = $._data($("#DocumentViewButtonAll")[0])
- ['events']['click'][0].handler.toString().match(/function[^{]+\{([\s\S]*)\}$/)[1];
- let pdfId = functionBody.match(regexp)[0].replace('(', '').replace(')', '').replace('"', '').replace('"', '');
- return 'http://recording.adcogov.org/LandmarkWeb//Document/GetDocumentForPrint/?request=' + encodeURIComponent(pdfId);
- },
- };
- }());
- App.start();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement