Skelun

Koikatsu

Oct 27th, 2023 (edited)
1,397
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* -----------------------------------------------
  2. -- MOON RATING
  3. --------------------------------------------------
  4. -- I'm using this because it was the easier way
  5. -- to simulate rating stars.
  6. -------------------------------------------------- */
  7.  
  8. function moonRating(number) {
  9.  
  10.   // Is it a number?
  11.   if (!isNaN(number)) {
  12.     var votes;
  13.  
  14.     // Is it an interger?
  15.     if (number % 1 === 0) {
  16.       votes="🌕".repeat(number);
  17.       votes+="🌑".repeat(5-number);
  18.    
  19.     // Or is it a float?
  20.     } else {
  21.       let n = Math.floor(number);
  22.       votes="🌕".repeat(n);
  23.       votes+="🌗";
  24.       votes+="🌑".repeat(4-n);
  25.     }
  26.     return votes;
  27.   } else {
  28.     // If no votes
  29.     return "------"
  30.   }
  31. }
  32.  
  33. /* --------------------------------------------------------------------
  34. -- Scraping data using Cheerio library
  35. -- (https://github.com/tani/cheeriogs)
  36. -- Script ID: 1ReeQ6WO8kKNxoaA_O0XEQ589cIrRvEBA9qcWpNqdOP17i47u6N9M5Xh0
  37. ----------------------------------------------------------------------- */
  38.  
  39. // Cheerio method to scrape
  40. function getContent(url) {
  41.     return UrlFetchApp.fetch(url).getContentText()
  42. }
  43.  
  44. // Actual scrape function
  45. function scrapeData() {
  46.  
  47.   // Selects the active sheet
  48.   var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  49.  
  50.   // Get selected cells
  51.   var selectedRange = sheet.getActiveRange();
  52.  
  53.   // Get the values from the selected cells
  54.   var selectedValues = selectedRange.getValues();
  55.  
  56.   for (var i = 0; i < selectedValues.length; i++) {
  57.  
  58.     // It assumes the URLs are in the first column of the selected column
  59.     var url = selectedValues[i][0];
  60.  
  61.     // If the value is not empty...
  62.     if (url) {
  63.       // Grabs the HTML data
  64.       var content = getContent(url);
  65.      
  66.       // Parses the HTML data
  67.       var $ = Cheerio.load(content);
  68.  
  69.       /* -----------------------------------------------
  70.       -- [TITLE]
  71.       -------------------------------------------------- */
  72.       var threadTitle = $('h1.p-title-value');
  73.  
  74.       var title = threadTitle.contents().filter(function() {
  75.         return this.type === 'text';
  76.       }).text();
  77.  
  78.       /* -----------------------------------------------
  79.       -- [STATUS] (ACTIVE - ABANDONED - ONHOLD)
  80.       -------------------------------------------------- */
  81.       // Default status
  82.       var status = 'Active';
  83.  
  84.       // Search for "Abandoned" or "Onhold"
  85.       threadTitle.find('span').each(function() {
  86.         var spanText = $(this).text();
  87.         if (spanText === 'abandoned' || spanText === 'Onhold') {
  88.           status = spanText;
  89.           return false;
  90.         }
  91.       });
  92.  
  93.       /* -----------------------------------------------
  94.       -- [ENGINE]
  95.       -------------------------------------------------- */
  96.       // If there's no engine info, it's unknown
  97.       var engine = 'Unknown';
  98.  
  99.       // Checks if there's one of these strings
  100.       threadTitle.find('span').each(function() {
  101.         var spanText = $(this).text();
  102.         if (['ADRIFT','Flash','Java','Others','QSP','RAGS','RPGM',"Ren'Py",'Tads','Unity','Unreal Engine','WebGL','Wolf RPG'].includes(spanText)) {
  103.           engine = spanText;
  104.           return false; // Stop searching once a match is found
  105.         }
  106.       });
  107.  
  108.       /* -----------------------------------------------
  109.       -- [TAGS]
  110.       ----------------------------------------------- */
  111.       // Selects the element containing the tags
  112.       var tagList = $('span.js-tagList');
  113.  
  114.       // Grabs the tags and separate them by comma
  115.       var tags = tagList.find('a.tagItem').map(function() {
  116.         return $(this).text();
  117.       }).get().join(', ');
  118.  
  119.       /* -----------------------------------------------
  120.       -- GAMEPLAY
  121.       -------------------------------------------------- */
  122.       var gameplay;
  123.  
  124.       // Assumes the [Gameplay] type by using the tags
  125.       switch (true) {
  126.         case tags.includes('sandbox'):
  127.           gameplay = 'Sandbox';
  128.           break;
  129.         case tags.includes('turn based combat'):
  130.           gameplay = 'TBC';
  131.           break;
  132.         case tags.includes('management'):
  133.           gameplay = 'Management';
  134.           break;
  135.         case tags.includes('simulator'):
  136.           gameplay = 'Simulator';
  137.           break;
  138.         case tags.includes('rpg'):
  139.           gameplay = 'RPG';
  140.           break;
  141.         default:
  142.           gameplay = 'Visual Novel';
  143.       }
  144.  
  145.       /* -----------------------------------------------
  146.       -- EXTRA INFORMATION
  147.       --------------------------------------------------
  148.       -- - Thread [Updated] Date
  149.       -- - [Release] Date
  150.       -- - [Dev] name
  151.       -- - [Censored]
  152.       -------------------------------------------------- */
  153.       // Selects the first message body (which is the one containing the game info)
  154.       var articleText = $('article.message-body').first().text();
  155.  
  156.       // Uses regular expressions to match and capture the information
  157.       // When it was updated
  158.       var updatedREGEX = /Thread Updated: (\d{4}-\d{2}-\d{2})/.exec(articleText);
  159.       // When it was released
  160.       var releaseREGEX = /Release Date: (\d{4}-\d{2}-\d{2})/.exec(articleText);
  161.       // Who is the dev
  162.       var devREGEX = /Developer: ([^\s]+)/.exec(articleText);
  163.       // It is censored?
  164.       var censoredREGEX = /Censored: (.+?)(?=\n|\r|\r\n)/.exec(articleText);
  165.  
  166.       // Extract the captured values or provide default values if not found
  167.       var updated = updatedREGEX ? updatedREGEX[1] : "Not found";
  168.       var release = releaseREGEX ? releaseREGEX[1] : "Not found";
  169.       var dev = devREGEX ? devREGEX[1] : "Not found";
  170.       var censored = censoredREGEX ? censoredREGEX[1] : "Not found";
  171.  
  172.       /* -----------------------------------------------
  173.       -- [RATING] & [VOTES]
  174.       -------------------------------------------------- */
  175.       // Grabs the JSON generated by F95 forum
  176.       var jsonInfo = $('script[type="application/ld+json"]').last();
  177.      
  178.       // Uses regular expression to match and caputre the information
  179.       var ratingREGEX = /"ratingValue": "(\d+(?:\.\d+)?)"/.exec(jsonInfo);
  180.       var votesREGEX = /"ratingCount": "(\d+)"/.exec(jsonInfo);
  181.  
  182.       // Extract the captured values or provide default values if not found
  183.       var rating = ratingREGEX ? ratingREGEX[1] : "No Rating";
  184.       var votes = votesREGEX ? votesREGEX[1] : "0";
  185.  
  186.       /* -----------------------------------------------
  187.       -- OUTPUTS THE VALUE INTO DIFFERENT CELLS
  188.       -------------------------------------------------- */
  189.  
  190.       // Write the scraped data to the same row
  191.       var outputRow = selectedRange.getRow() + i;
  192.      
  193.       // Write the scraped data to adjacent columns
  194.       var outputColumn = selectedRange.getColumn() + 1;
  195.      
  196.       // All values that will be written
  197.       var values = [[
  198.                       title,
  199.                       status,
  200.                       engine,
  201.                       gameplay,
  202.                       tags,
  203.                       moonRating(rating),
  204.                       votes,
  205.                       updated,
  206.                       release,
  207.                       dev,
  208.                       censored
  209.                     ]];
  210.  
  211.       // Where it will be written
  212.       sheet.getRange(outputRow, outputColumn, 1, values[0].length).setValues(values);
  213.     }
  214.   }
  215.  
  216. }
  217.  
  218. /* -----------------------------------------------
  219. -- CREATES AN EXTRA MENU OPTION
  220. -------------------------------------------------- */
  221.  
  222. function onOpen() {
  223.   var ui = SpreadsheetApp.getUi();
  224.   ui.createMenu('Scrape Data')
  225.       .addItem('Scrape Data', 'scrapeData')
  226.       .addToUi();
  227. }
Advertisement
Add Comment
Please, Sign In to add comment