IAmMoonie

Log Drive Data to Google Sheets.

Oct 10th, 2024
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
JavaScript 8.66 KB | Source Code | 0 0
  1. /**
  2.  * @fileoverview A Google Apps Script to retrieve file names, IDs, and paths from a
  3.  * specified Google Drive folder and populate them into a Google Sheets document.
  4.  * Supports recursive retrieval of files from subfolders if enabled, and uses
  5.  * the Advanced Drive API with pagination for handling large datasets.
  6.  *
  7.  * This script is ideal for automating data collection from Google Drive into
  8.  * Google Sheets, particularly in enterprise environments where scalability and
  9.  * error handling are critical.
  10.  *
  11.  * @version 1.5.1
  12.  * @date 2024-10-11
  13.  * @modifiedBy u/IAmMoonie
  14.  * @see {@link https://developers.google.com/apps-script/advanced/drive} - For using the Advanced Drive API.
  15.  * @see {@link https://developers.google.com/apps-script/reference/properties/properties-service} - For storing progress using PropertiesService.
  16.  * @see {@link https://developers.google.com/apps-script/reference/spreadsheet/spreadsheet-app} - For interacting with Google Sheets.
  17.  * @see {@link https://developers.google.com/apps-script/guides/v8-runtime} - Information on V8 runtime and ES6 compatibility.
  18.  */
  19.  
  20. /**
  21.  * Configuration object for user-defined variables.
  22.  * @constant {Object} CONFIG
  23.  * @property {string} folderId - The ID of the Google Drive folder to retrieve files from.
  24.  * @property {number} rangeStartRow - The starting row number in the Google Sheet where the data should be written.
  25.  * @property {number} fileNameColumn - The column number in the Google Sheet where file names will be written (e.g., 5 for column E).
  26.  * @property {number} fileIdColumn - The column number in the Google Sheet where file IDs will be written (e.g., 9 for column I).
  27.  * @property {number} filePathColumn - The column number in the Google Sheet where file paths will be written.
  28.  * @property {number} batchSize - The number of files to retrieve per API call for efficient pagination.
  29.  * @property {string} propertyKey - The key used to store the last processed page token in the script's properties, enabling script continuity.
  30.  * @property {boolean} recursive - If true, retrieves files from all subfolders recursively.
  31.  * @property {number} chunkSize - The number of rows to write to the sheet at a time to avoid memory issues.
  32.  * @property {number} initialBackoff - The initial delay (in milliseconds) for exponential backoff when handling rate limits.
  33.  */
  34. const CONFIG = {
  35.   folderId: "<Google Drive Folder>",
  36.   rangeStartRow: 2,
  37.   fileNameColumn: 1,
  38.   fileIdColumn: 2,
  39.   filePathColumn: 3,
  40.   batchSize: 100,
  41.   propertyKey: "LAST_PAGE_TOKEN",
  42.   recursive: false,
  43.   chunkSize: 500,
  44.   initialBackoff: 1000
  45. };
  46. let retryCount = 0;
  47.  
  48. /**
  49.  * Retrieves file names, IDs, and paths from a specified Google Drive folder and writes
  50.  * them to the active Google Sheet. Supports recursive exploration of subfolders.
  51.  * Uses the Advanced Drive API for pagination and batch operations for large datasets.
  52.  *
  53.  * @function getFileIds
  54.  * @returns {void} This function does not return a value but writes data directly to the Google Sheet.
  55.  * @throws {Error} Throws an error if an issue occurs while retrieving files or writing data.
  56.  */
  57. function getFileIds() {
  58.   validateConfig_();
  59.   try {
  60.     log_("Starting file retrieval process...");
  61.     const sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  62.     let pageToken = PropertiesService.getScriptProperties().getProperty(
  63.       CONFIG.propertyKey
  64.     );
  65.     const names = [];
  66.     const ids = [];
  67.     const paths = [];
  68.     fetchFilesAndSubfolders_(
  69.       CONFIG.folderId,
  70.       pageToken,
  71.       names,
  72.       ids,
  73.       paths,
  74.       0,
  75.       ""
  76.     );
  77.     if (names.length) {
  78.       const lastRow = sheet.getLastRow() + 1;
  79.       log_(`Writing ${names.length} files to the sheet in chunks.`);
  80.       writeInChunks_(sheet, names, ids, paths, lastRow);
  81.       retryCount = 0;
  82.     } else {
  83.       log_(
  84.         "No files found in the specified folder and its subfolders.",
  85.         "warn"
  86.       );
  87.     }
  88.     PropertiesService.getScriptProperties().deleteProperty(CONFIG.propertyKey);
  89.     PropertiesService.getScriptProperties().deleteProperty(
  90.       "LAST_PROCESSED_FOLDER"
  91.     );
  92.     log_("File retrieval and data writing completed successfully.");
  93.   } catch (error) {
  94.     handleRateLimit_(error);
  95.   }
  96. }
  97.  
  98. /**
  99.  * Recursively fetches files from a folder and its subfolders if recursion is enabled.
  100.  * Uses the Advanced Drive API for paginated results.
  101.  *
  102.  * @param {string} folderId - The ID of the Google Drive folder to retrieve files from.
  103.  * @param {string|null} pageToken - The page token for continuing a previous fetch.
  104.  * @param {Array} names - The array to store file names.
  105.  * @param {Array} ids - The array to store file IDs.
  106.  * @param {Array} paths - The array to store file paths.
  107.  * @param {number} depth - The current depth of recursion.
  108.  * @param {string} currentPath - The path of the current folder.
  109.  * @returns {void}
  110.  * @private
  111.  */
  112. function fetchFilesAndSubfolders_(
  113.   folderId,
  114.   pageToken,
  115.   names,
  116.   ids,
  117.   paths,
  118.   depth,
  119.   currentPath
  120. ) {
  121.   do {
  122.     const response = Drive.Files.list({
  123.       q: `'${folderId}' in parents and trashed = false`,
  124.       fields: "nextPageToken, files(id, name, mimeType)",
  125.       pageSize: CONFIG.batchSize,
  126.       pageToken: pageToken || null
  127.     });
  128.     const files = response.files || [];
  129.     for (const { mimeType, name, id } of files) {
  130.       const fullPath = `${currentPath}/${name}`;
  131.       if (
  132.         mimeType === "application/vnd.google-apps.folder" &&
  133.         CONFIG.recursive
  134.       ) {
  135.         log_(`Exploring subfolder: ${name} (${id}), Depth: ${depth + 1}`);
  136.         fetchFilesAndSubfolders_(
  137.           id,
  138.           null,
  139.           names,
  140.           ids,
  141.           paths,
  142.           depth + 1,
  143.           fullPath
  144.         );
  145.       } else {
  146.         names.push([name]);
  147.         ids.push([id]);
  148.         paths.push([fullPath]);
  149.       }
  150.     }
  151.     pageToken = response.nextPageToken;
  152.     if (pageToken) {
  153.       PropertiesService.getScriptProperties().setProperty(
  154.         CONFIG.propertyKey,
  155.         pageToken
  156.       );
  157.       PropertiesService.getScriptProperties().setProperty(
  158.         "LAST_PROCESSED_FOLDER",
  159.         folderId
  160.       );
  161.     }
  162.   } while (pageToken);
  163. }
  164.  
  165. /**
  166.  * Writes data in chunks to the Google Sheet to avoid memory issues.
  167.  *
  168.  * @param {GoogleAppsScript.Spreadsheet.Sheet} sheet - The Google Sheet to write data to.
  169.  * @param {Array} names - The array of file names.
  170.  * @param {Array} ids - The array of file IDs.
  171.  * @param {Array} paths - The array of file paths.
  172.  * @param {number} startRow - The starting row for writing data.
  173.  * @returns {void}
  174.  * @private
  175.  */
  176. function writeInChunks_(sheet, names, ids, paths, startRow) {
  177.   const { chunkSize, fileNameColumn, fileIdColumn, filePathColumn } = CONFIG;
  178.   for (let i = 0; i < names.length; i += chunkSize) {
  179.     const nameChunk = names.slice(i, i + chunkSize);
  180.     const idChunk = ids.slice(i, i + chunkSize);
  181.     const pathChunk = paths.slice(i, i + chunkSize);
  182.     sheet
  183.       .getRange(startRow, fileNameColumn, nameChunk.length, 1)
  184.       .setValues(nameChunk);
  185.     sheet
  186.       .getRange(startRow, fileIdColumn, idChunk.length, 1)
  187.       .setValues(idChunk);
  188.     sheet
  189.       .getRange(startRow, filePathColumn, pathChunk.length, 1)
  190.       .setValues(pathChunk);
  191.     startRow += nameChunk.length;
  192.     log_(
  193.       `Wrote ${nameChunk.length} rows to the sheet, starting from row ${
  194.         startRow - nameChunk.length
  195.       }.`
  196.     );
  197.   }
  198. }
  199.  
  200. /**
  201.  * Handles API rate limit errors with exponential backoff.
  202.  *
  203.  * @param {Error} error - The error object.
  204.  * @private
  205.  */
  206. function handleRateLimit_({ message }) {
  207.   log_(`Error in getFileIds: ${message}`, "error");
  208.   if (message.includes("Rate Limit Exceeded")) {
  209.     const delay = Math.min(60000, CONFIG.initialBackoff * 2 ** retryCount);
  210.     log_(`Rate limit exceeded, retrying in ${delay / 1000} seconds...`);
  211.     Utilities.sleep(delay);
  212.     retryCount++;
  213.     getFileIds();
  214.   } else {
  215.     throw new Error(
  216.       "An error occurred while retrieving files or writing data. Check logs for details."
  217.     );
  218.   }
  219. }
  220.  
  221. /**
  222.  * Validates the configuration settings.
  223.  * @returns {void}
  224.  * @private
  225.  */
  226. function validateConfig_() {
  227.   if (!CONFIG.folderId)
  228.     throw new Error(
  229.       "CONFIG.folderId is not set. Please specify a valid folder ID."
  230.     );
  231.   if (CONFIG.chunkSize <= 0)
  232.     throw new Error("CONFIG.chunkSize must be greater than 0.");
  233. }
  234.  
  235. /**
  236.  * Logs messages with different levels.
  237.  *
  238.  * @param {string} message - The message to log.
  239.  * @param {"info"|"warn"|"error"} [level="info"] - The level of the log message.
  240.  * @returns {void}
  241.  * @private
  242.  */
  243. function log_(message, level = "info") {
  244.   console[level](message);
  245. }
  246.  
Advertisement
Add Comment
Please, Sign In to add comment