Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Script for downloading all of the music off of coda's blog
- * at: https://coda.s3m.us/category/ohc/page/137/
- */
- var request = require("request");
- var jsdom = require("jsdom");
- function getPage(page, cb) {
- var postData = {
- action: "infinite_scroll",
- page: page,
- currentday: "07.08.06",
- order: "DESC",
- "query_args[paged]": 2,
- "query_args[category_name]": "ohc",
- "query_args[cat]": 10229527,
- "query_args[nopaging]": false,
- "query_args[no_found_rows]": false,
- "query_args[comments_per_page]": 0,
- "query_args[order]": "DESC"
- };
- var options = {
- url: 'https://coda.s3m.us/?infinity=scrolling',
- method: 'POST',
- // auth: {
- // user: 'admin',
- // pass: 'password',
- // },
- form: postData
- };
- var r = request(options, function(err, res, body) {
- if (err) {
- console.dir(err)
- return
- }
- // console.dir('status code', res.statusCode)
- // console.log(JSON.parse(body).postflair);
- cb(JSON.parse(body));
- // https://codatrigger.files.wordpress.com/2015/08/eggshoes.mp3
- // https://codatrigger.files.wordpress.com/2009/02/untripped.mp3
- // and the generated ones look like:
- // https://codatrigger.files.wordpress.com/2016/02/meca.mp3 <--- good
- // https://codatrigger.files.wordpress.com/2006/08/so-so-damn-tired.mp3 <-- bad?
- });
- }
- function processUrl(index, listOfUrls, data, doneCb) {
- // make sure we actually have a url, if not, just harmlessly passthrough to the next step
- if (!listOfUrls[index])
- return doneCb(data);
- // otherwise process the url async
- jsdom.env(
- listOfUrls[index],
- ["http://code.jquery.com/jquery.js"],
- function(err, window) {
- var realUrl = "null";
- Array.prototype.forEach.call(
- window.document.getElementsByTagName("a"),
- function(b) {
- if (b.href.indexOf(".mp3") > -1)
- realUrl = b.href;
- });
- console.log("found realUrl: " + realUrl);
- data.push(realUrl);
- if (index < listOfUrls.length-1) {
- processUrl(index+1, listOfUrls, data, doneCb);
- }
- else {
- doneCb(data);
- }
- });
- }
- function tick(page, data) {
- console.log("Requesting page " + page + "....");
- getPage(page, function(body) {
- var searchUrls = [];
- for (var url in body.postflair) {
- searchUrls.push(url);
- }
- // process each of those urls, searching for the mp3 download URL, then finally
- // continue to the next page
- processUrl(0, searchUrls, data, function(data) {
- if (page > 0) {
- tick(page-1, data);
- }
- else {
- console.log("Data: ");
- console.log(data);
- console.log("dumping URLs to file...");
- var fs = require('fs');
- var stream_mp3s = fs.createWriteStream("all_urls.txt");
- var raw_out = fs.createWriteStream("raw_out.txt");
- stream_mp3s.once("open", function(fd) {
- for (var i = 0; i < data.length; i++) {
- stream_mp3s.write(
- (data[i]
- .replace(/coda\.s3m\.us/gi, "codatrigger.files.wordpress.com") // convert domain
- .replace(/([0-9]{4}\/[0-9]{2}\/)([0-9]{2}\/)/, "\$1") // remove day from URL
- .replace(/\/$/,"") // remove trailing /
- + ".mp3") // add mp3
- + "\n");
- raw_out.write(data[i] + "\n");
- }
- stream_mp3s.end();
- raw_out.end();
- });
- console.log("truly done.");
- }
- });
- });
- }
- // tick(136, []);
- tick(140, []);
- console.log("done.");
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement