Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* -----------------------------------------------
- -- MOON RATING
- --------------------------------------------------
- -- I'm using this because it was the easier way
- -- to simulate rating stars.
- -------------------------------------------------- */
- function moonRating(number) {
- // Is it a number?
- if (!isNaN(number)) {
- var votes;
- // Is it an interger?
- if (number % 1 === 0) {
- votes="🌕".repeat(number);
- votes+="🌑".repeat(5-number);
- // Or is it a float?
- } else {
- let n = Math.floor(number);
- votes="🌕".repeat(n);
- votes+="🌗";
- votes+="🌑".repeat(4-n);
- }
- return votes;
- } else {
- // If no votes
- return "------"
- }
- }
- /* --------------------------------------------------------------------
- -- Scraping data using Cheerio library
- -- (https://github.com/tani/cheeriogs)
- -- Script ID: 1ReeQ6WO8kKNxoaA_O0XEQ589cIrRvEBA9qcWpNqdOP17i47u6N9M5Xh0
- ----------------------------------------------------------------------- */
- // Cheerio method to scrape
- function getContent(url) {
- return UrlFetchApp.fetch(url).getContentText()
- }
- // Actual scrape function
- function scrapeData() {
- // Selects the active sheet
- var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
- // Get selected cells
- var selectedRange = sheet.getActiveRange();
- // Get the values from the selected cells
- var selectedValues = selectedRange.getValues();
- for (var i = 0; i < selectedValues.length; i++) {
- // It assumes the URLs are in the first column of the selected column
- var url = selectedValues[i][0];
- // If the value is not empty...
- if (url) {
- // Grabs the HTML data
- var content = getContent(url);
- // Parses the HTML data
- var $ = Cheerio.load(content);
- /* -----------------------------------------------
- -- [TITLE]
- -------------------------------------------------- */
- var threadTitle = $('h1.p-title-value');
- var title = threadTitle.contents().filter(function() {
- return this.type === 'text';
- }).text();
- /* -----------------------------------------------
- -- [STATUS] (ACTIVE - ABANDONED - ONHOLD)
- -------------------------------------------------- */
- // Default status
- var status = 'Active';
- // Search for "Abandoned" or "Onhold"
- threadTitle.find('span').each(function() {
- var spanText = $(this).text();
- if (spanText === 'abandoned' || spanText === 'Onhold') {
- status = spanText;
- return false;
- }
- });
- /* -----------------------------------------------
- -- [ENGINE]
- -------------------------------------------------- */
- // If there's no engine info, it's unknown
- var engine = 'Unknown';
- // Checks if there's one of these strings
- threadTitle.find('span').each(function() {
- var spanText = $(this).text();
- if (['ADRIFT','Flash','Java','Others','QSP','RAGS','RPGM',"Ren'Py",'Tads','Unity','Unreal Engine','WebGL','Wolf RPG'].includes(spanText)) {
- engine = spanText;
- return false; // Stop searching once a match is found
- }
- });
- /* -----------------------------------------------
- -- [TAGS]
- ----------------------------------------------- */
- // Selects the element containing the tags
- var tagList = $('span.js-tagList');
- // Grabs the tags and separate them by comma
- var tags = tagList.find('a.tagItem').map(function() {
- return $(this).text();
- }).get().join(', ');
- /* -----------------------------------------------
- -- GAMEPLAY
- -------------------------------------------------- */
- var gameplay;
- // Assumes the [Gameplay] type by using the tags
- switch (true) {
- case tags.includes('sandbox'):
- gameplay = 'Sandbox';
- break;
- case tags.includes('turn based combat'):
- gameplay = 'TBC';
- break;
- case tags.includes('management'):
- gameplay = 'Management';
- break;
- case tags.includes('simulator'):
- gameplay = 'Simulator';
- break;
- case tags.includes('rpg'):
- gameplay = 'RPG';
- break;
- default:
- gameplay = 'Visual Novel';
- }
- /* -----------------------------------------------
- -- EXTRA INFORMATION
- --------------------------------------------------
- -- - Thread [Updated] Date
- -- - [Release] Date
- -- - [Dev] name
- -- - [Censored]
- -------------------------------------------------- */
- // Selects the first message body (which is the one containing the game info)
- var articleText = $('article.message-body').first().text();
- // Uses regular expressions to match and capture the information
- // When it was updated
- var updatedREGEX = /Thread Updated: (\d{4}-\d{2}-\d{2})/.exec(articleText);
- // When it was released
- var releaseREGEX = /Release Date: (\d{4}-\d{2}-\d{2})/.exec(articleText);
- // Who is the dev
- var devREGEX = /Developer: ([^\s]+)/.exec(articleText);
- // It is censored?
- var censoredREGEX = /Censored: (.+?)(?=\n|\r|\r\n)/.exec(articleText);
- // Extract the captured values or provide default values if not found
- var updated = updatedREGEX ? updatedREGEX[1] : "Not found";
- var release = releaseREGEX ? releaseREGEX[1] : "Not found";
- var dev = devREGEX ? devREGEX[1] : "Not found";
- var censored = censoredREGEX ? censoredREGEX[1] : "Not found";
- /* -----------------------------------------------
- -- [RATING] & [VOTES]
- -------------------------------------------------- */
- // Grabs the JSON generated by F95 forum
- var jsonInfo = $('script[type="application/ld+json"]').last();
- // Uses regular expression to match and caputre the information
- var ratingREGEX = /"ratingValue": "(\d+(?:\.\d+)?)"/.exec(jsonInfo);
- var votesREGEX = /"ratingCount": "(\d+)"/.exec(jsonInfo);
- // Extract the captured values or provide default values if not found
- var rating = ratingREGEX ? ratingREGEX[1] : "No Rating";
- var votes = votesREGEX ? votesREGEX[1] : "0";
- /* -----------------------------------------------
- -- OUTPUTS THE VALUE INTO DIFFERENT CELLS
- -------------------------------------------------- */
- // Write the scraped data to the same row
- var outputRow = selectedRange.getRow() + i;
- // Write the scraped data to adjacent columns
- var outputColumn = selectedRange.getColumn() + 1;
- // All values that will be written
- var values = [[
- title,
- status,
- engine,
- gameplay,
- tags,
- moonRating(rating),
- votes,
- updated,
- release,
- dev,
- censored
- ]];
- // Where it will be written
- sheet.getRange(outputRow, outputColumn, 1, values[0].length).setValues(values);
- }
- }
- }
- /* -----------------------------------------------
- -- CREATES AN EXTRA MENU OPTION
- -------------------------------------------------- */
- function onOpen() {
- var ui = SpreadsheetApp.getUi();
- ui.createMenu('Scrape Data')
- .addItem('Scrape Data', 'scrapeData')
- .addToUi();
- }
Advertisement
Add Comment
Please, Sign In to add comment