Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* --------------------------------------------------------------------
- -- Transforms embedded links into actual Hyperlinks
- ----------------------------------------------------------------------- */
- function createHyperlink() {
- var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
- // Get the selected range
- var range = sheet.getActiveRange();
- var richTextValues = range.getRichTextValues();
- for (var i = 0; i < richTextValues.length; i++) {
- for (var j = 0; j < richTextValues[0].length; j++) {
- var textValue = richTextValues[i][j].getText();
- var link = richTextValues[i][j].getLinkUrl();
- if (link) {
- // Creates a hyperlink formula
- var formula = '=HYPERLINK("' + link + '", "Here")';
- // Set the formula in the cell
- sheet.getRange(range.getRow() + i, range.getColumn() + j).setFormula(formula);
- }
- }
- }
- }
- /* --------------------------------------------------------------------
- -- MOON RATING
- -----------------------------------------------------------------------
- -- I'm using this because it was the easier way
- -- to simulate rating stars.
- ----------------------------------------------------------------------- */
- function moonRating(number) {
- // Is it a number?
- if (!isNaN(number)) {
- var votes;
- // Is it an interger?
- if (number % 1 === 0) {
- votes="🌕".repeat(number);
- votes+="🌑".repeat(5-number);
- // Or is it a float?
- } else {
- let n = Math.floor(number);
- votes="🌕".repeat(n);
- votes+="🌗";
- votes+="🌑".repeat(4-n);
- }
- return votes;
- } else {
- // If no votes
- return "------"
- }
- }
- /* --------------------------------------------------------------------
- -- Scraping data using Cheerio library
- -- (https://github.com/tani/cheeriogs)
- -- Script ID: 1ReeQ6WO8kKNxoaA_O0XEQ589cIrRvEBA9qcWpNqdOP17i47u6N9M5Xh0
- ----------------------------------------------------------------------- */
- // Cheerio method to scrape
- function getContent(url) {
- return UrlFetchApp.fetch(url).getContentText()
- }
- /* --------------------------------------------------------------------
- -- Actual scrape function
- ----------------------------------------------------------------------- */
- function scrapeData(force) {
- // Selects the active sheet
- var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
- // Get selected cells
- var selectedRange = sheet.getActiveRange();
- // Get the values from the selected cells
- var selectedValues = selectedRange.getRichTextValues();
- // Iterate through cells
- for (var i = 0; i < selectedValues.length; i++) {
- // It assumes the URLs are in the first column of the selected column
- var urlFromLink = selectedValues[i][0].getLinkUrl();
- var urlFromCellValue = selectedValues[i][0].getText(); // Get text value from the cell
- // Check if there is a link URL, and use it if available; otherwise, use the URL from the cell value
- var url = urlFromLink ? urlFromLink : urlFromCellValue;
- // If the value is not empty...
- if (url && isValidURL(url)) {
- // Grabs the HTML data
- var content = getContent(url);
- // Parses the HTML data
- var $ = Cheerio.load(content);
- /* --------------------------------------------------------------------
- -- [TITLE]
- -------------------------------------------------------------------- */
- var threadTitle = $('h1.p-title-value');
- var rawTitle = threadTitle.contents().filter(function() {
- return this.type === 'text';
- }).text();
- var titleREGEX = /(.+?)\s*\[(.+?)\](?:\s*\[(.+?)\])?/.exec(rawTitle);
- // We should check if titleREGEX is not null before accessing its indices
- if (titleREGEX) {
- var title = titleREGEX[1];
- var version = titleREGEX[2];
- } else {
- var title = "Title Problem ⚠️";
- var version = "Version Problem ⚠️";
- }
- var devBackup = titleREGEX[3] ? titleREGEX[3] + " ❓" : "Not found ⚠️";
- /* --------------------------------------------------------------------
- -- [STATUS] (ACTIVE - ABANDONED - ONHOLD)
- -------------------------------------------------------------------- */
- // Default status
- var status = 'Active';
- // Search for "Abandoned" or "Onhold"
- threadTitle.find('span').each(function() {
- var spanText = $(this).text();
- if (['Completed','Abandoned','Onhold'].includes(spanText)) {
- status = spanText;
- return false;
- }
- });
- /* --------------------------------------------------------------------
- -- [ENGINE]
- ----------------------------------------------------------------------- */
- // If there's no engine info, it returns "unknown"
- var engine = '⚠️ Unknown';
- // Checks if there's one of these strings
- threadTitle.find('span').each(function() {
- var spanText = $(this).text();
- if (['ADRIFT','Flash','Java','Others','QSP','RAGS','RPGM',"Ren'Py",'Tads','Unity','Unreal Engine','WebGL','Wolf RPG','HTML'].includes(spanText)) {
- engine = spanText;
- return false; // Stop searching once a match is found
- }
- });
- /* --------------------------------------------------------------------
- -- [TAGS]
- ----------------------------------------------------------------------- */
- // Selects the element containing the tags
- var tagList = $('span.js-tagList');
- // Grabs the tags and separate them by comma
- var tags = tagList.find('a.tagItem').map(function() {
- return $(this).text();
- }).get().join(', ');
- /* --------------------------------------------------------------------
- -- GAMEPLAY
- ----------------------------------------------------------------------- */
- var gameplay;
- // Assumes the [Gameplay] type by using the tags
- switch (true) {
- case tags.includes('sandbox'):
- gameplay = 'Sandbox';
- break;
- case tags.includes('turn based combat'):
- gameplay = 'TBC';
- break;
- case tags.includes('management'):
- gameplay = 'Management';
- break;
- case tags.includes('simulator'):
- gameplay = 'Simulator';
- break;
- case tags.includes('rpg'):
- gameplay = 'RPG';
- break;
- default:
- gameplay = 'Visual Novel';
- }
- /* --------------------------------------------------------------------
- -- - Thread [Start] Date
- ----------------------------------------------------------------------- */
- // Selects the first 'article' element
- var firstArticle = $('article').first();
- // Find the 'header.message-attribution' element
- var messageAttributionHeader = firstArticle.find('header.message-attribution');
- // Find the 'time' element with class 'u-dt'
- var timeElement = messageAttributionHeader.find('time.u-dt');
- // Check if the time element exists
- if (timeElement.length > 0) {
- // Extract the 'datetime' attribute
- var start = timeElement.attr('datetime');
- start = start.split('T')[0]; // Get the date part only
- } else {
- Logger.log("<time> element not found.");
- }
- /* --------------------------------------------------------------------
- -- EXTRA INFORMATION
- -----------------------------------------------------------------------
- -- - Thread [Updated] Date
- -- - [Release] Date
- -- - [Dev] name
- -- - [Censored]
- ----------------------------------------------------------------------- */
- // Selects the first message body (which is the one containing the game info)
- var articleText = $('article.message-body').first().text();
- // Uses regular expressions to match and capture the information
- // When it was updated
- var updatedREGEX = /Thread Updated: ([^\s]+)/.exec(articleText);
- // When it was released
- var releaseREGEX = /Release Date: ([^\s]+)/.exec(articleText);
- // Who is the dev
- var devREGEX = /Developer: ([^\s]+)/.exec(articleText);
- // It is censored?
- var censoredREGEX = /Censored: (.+?)(?=\n|\r|\r\n)/.exec(articleText);
- // Extract the captured values or provide default values if not found
- var updated = updatedREGEX ? updatedREGEX[1] : "Not found ⚠️";
- var release = releaseREGEX ? releaseREGEX[1] : "Not found ⚠️";
- //Converts to string due to Google Sheets altering the type
- updated = updated.toString()
- release = release.toString()
- var dev = devREGEX ? devREGEX[1] : devBackup;
- var censored = censoredREGEX ? censoredREGEX[1] : "Not found ⚠️";
- /* --------------------------------------------------------------------
- -- [RATING] & [VOTES]
- ----------------------------------------------------------------------- */
- // Grabs the JSON generated by F95 forum
- var jsonInfo = $('script[type="application/ld+json"]').last();
- // Uses regular expression to match and caputre the information
- var ratingREGEX = /"ratingValue": "(\d+(?:\.\d+)?)"/.exec(jsonInfo);
- var votesREGEX = /"ratingCount": "(\d+)"/.exec(jsonInfo);
- // Extract the captured values or provide default values if not found
- var rating = ratingREGEX ? ratingREGEX[1] : "No rating yet";
- var votes = votesREGEX ? votesREGEX[1] : "0";
- /* --------------------------------------------------------------------
- -- LAST UPDATE CHECK
- -- --------------------------------------------------------------------
- -- It will ignore the game if it was updated less than 30 days ago
- ----------------------------------------------------------------------- */
- // Gets today's date
- var today = new Date();
- // Parses the update & release info
- var updatedDate = new Date(updated);
- var releaseDate = new Date(release);
- // Subtracts both dates to get days gap
- var daysSinceLastUpdate = Math.floor((today - updatedDate) / (1000 * 60 * 60 * 24));
- var daysSinceRelease = Math.floor((today - releaseDate) / (1000 * 60 * 60 * 24));
- if (force === true || daysSinceRelease < 30 || daysSinceLastUpdate > 30) {
- /* --------------------------------------------------------------------
- -- OUTPUTS THE VALUE INTO DIFFERENT CELLS
- ----------------------------------------------------------------------- */
- // Write the scraped data to the same row
- var outputRow = selectedRange.getRow() + i;
- // Write the scraped data to adjacent columns
- var outputColumn = selectedRange.getColumn() + 1;
- // All values that will be written
- var newValues = [
- rawTitle,
- title,
- version,
- status,
- engine,
- gameplay,
- tags,
- moonRating(rating),
- votes,
- updated,
- release,
- start,
- dev,
- censored
- ];
- // Read existing values in the row
- var rangeToWrite = sheet.getRange(outputRow, outputColumn, 1, newValues.length);
- var existingValues = rangeToWrite.getValues()[0];
- // Compare existing values with new values
- for (var j = 0; j < existingValues.length; j++) {
- // If the new value is empty, keep the existing value
- if (newValues[j] === "" || newValues[j] == null) {
- newValues[j] = existingValues[j];
- }
- }
- // Set only the new non-empty values
- rangeToWrite.setValues([newValues]);
- }
- }
- }
- }
- /* --------------------------------------------------------------------
- -- IGNORE DATE RESTRICTION
- ----------------------------------------------------------------------- */
- function forceScrapeData() {
- scrapeData(true);
- }
- /* --------------------------------------------------------------------
- -- CHECK IF THE STRING IS A VALID URL
- ----------------------------------------------------------------------- */
- function isValidURL(str) {
- var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
- '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name
- '((\\d{1,3}\\.){3}\\d{1,3}))' + // OR an IP Address
- '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path
- '(\\?[;&a-z\\d%_.~+=-]*)?' + // query string
- '(\\#[-a-z\\d_]*)?$', 'i'); // fragment locator
- return pattern.test(str);
- }
- /* --------------------------------------------------------------------
- -- CREATES AN EXTRA MENU OPTION
- ----------------------------------------------------------------------- */
- function onOpen() {
- var ui = SpreadsheetApp.getUi();
- ui.createMenu('Extra Functions')
- .addItem('Scrape Data', 'scrapeData')
- .addItem('Force Scrape', 'forceScrapeData')
- .addItem('Create Hyperlink','createHyperlink')
- .addToUi();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement