Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * @fileoverview A Google Apps Script to retrieve file names, IDs, and paths from a
- * specified Google Drive folder and populate them into a Google Sheets document.
- * Supports recursive retrieval of files from subfolders if enabled, and uses
- * the Advanced Drive API with pagination for handling large datasets.
- *
- * This script is ideal for automating data collection from Google Drive into
- * Google Sheets, particularly in enterprise environments where scalability and
- * error handling are critical.
- *
- * @version 1.5.1
- * @date 2024-10-11
- * @modifiedBy u/IAmMoonie
- * @see {@link https://developers.google.com/apps-script/advanced/drive} - For using the Advanced Drive API.
- * @see {@link https://developers.google.com/apps-script/reference/properties/properties-service} - For storing progress using PropertiesService.
- * @see {@link https://developers.google.com/apps-script/reference/spreadsheet/spreadsheet-app} - For interacting with Google Sheets.
- * @see {@link https://developers.google.com/apps-script/guides/v8-runtime} - Information on V8 runtime and ES6 compatibility.
- */
- /**
- * Configuration object for user-defined variables.
- * @constant {Object} CONFIG
- * @property {string} folderId - The ID of the Google Drive folder to retrieve files from.
- * @property {number} rangeStartRow - The starting row number in the Google Sheet where the data should be written.
- * @property {number} fileNameColumn - The column number in the Google Sheet where file names will be written (e.g., 5 for column E).
- * @property {number} fileIdColumn - The column number in the Google Sheet where file IDs will be written (e.g., 9 for column I).
- * @property {number} filePathColumn - The column number in the Google Sheet where file paths will be written.
- * @property {number} batchSize - The number of files to retrieve per API call for efficient pagination.
- * @property {string} propertyKey - The key used to store the last processed page token in the script's properties, enabling script continuity.
- * @property {boolean} recursive - If true, retrieves files from all subfolders recursively.
- * @property {number} chunkSize - The number of rows to write to the sheet at a time to avoid memory issues.
- * @property {number} initialBackoff - The initial delay (in milliseconds) for exponential backoff when handling rate limits.
- */
- const CONFIG = {
- folderId: "<Google Drive Folder>",
- rangeStartRow: 2,
- fileNameColumn: 1,
- fileIdColumn: 2,
- filePathColumn: 3,
- batchSize: 100,
- propertyKey: "LAST_PAGE_TOKEN",
- recursive: false,
- chunkSize: 500,
- initialBackoff: 1000
- };
- let retryCount = 0;
- /**
- * Retrieves file names, IDs, and paths from a specified Google Drive folder and writes
- * them to the active Google Sheet. Supports recursive exploration of subfolders.
- * Uses the Advanced Drive API for pagination and batch operations for large datasets.
- *
- * @function getFileIds
- * @returns {void} This function does not return a value but writes data directly to the Google Sheet.
- * @throws {Error} Throws an error if an issue occurs while retrieving files or writing data.
- */
- function getFileIds() {
- validateConfig_();
- try {
- log_("Starting file retrieval process...");
- const sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
- let pageToken = PropertiesService.getScriptProperties().getProperty(
- CONFIG.propertyKey
- );
- const names = [];
- const ids = [];
- const paths = [];
- fetchFilesAndSubfolders_(
- CONFIG.folderId,
- pageToken,
- names,
- ids,
- paths,
- 0,
- ""
- );
- if (names.length) {
- const lastRow = sheet.getLastRow() + 1;
- log_(`Writing ${names.length} files to the sheet in chunks.`);
- writeInChunks_(sheet, names, ids, paths, lastRow);
- retryCount = 0;
- } else {
- log_(
- "No files found in the specified folder and its subfolders.",
- "warn"
- );
- }
- PropertiesService.getScriptProperties().deleteProperty(CONFIG.propertyKey);
- PropertiesService.getScriptProperties().deleteProperty(
- "LAST_PROCESSED_FOLDER"
- );
- log_("File retrieval and data writing completed successfully.");
- } catch (error) {
- handleRateLimit_(error);
- }
- }
- /**
- * Recursively fetches files from a folder and its subfolders if recursion is enabled.
- * Uses the Advanced Drive API for paginated results.
- *
- * @param {string} folderId - The ID of the Google Drive folder to retrieve files from.
- * @param {string|null} pageToken - The page token for continuing a previous fetch.
- * @param {Array} names - The array to store file names.
- * @param {Array} ids - The array to store file IDs.
- * @param {Array} paths - The array to store file paths.
- * @param {number} depth - The current depth of recursion.
- * @param {string} currentPath - The path of the current folder.
- * @returns {void}
- * @private
- */
- function fetchFilesAndSubfolders_(
- folderId,
- pageToken,
- names,
- ids,
- paths,
- depth,
- currentPath
- ) {
- do {
- const response = Drive.Files.list({
- q: `'${folderId}' in parents and trashed = false`,
- fields: "nextPageToken, files(id, name, mimeType)",
- pageSize: CONFIG.batchSize,
- pageToken: pageToken || null
- });
- const files = response.files || [];
- for (const { mimeType, name, id } of files) {
- const fullPath = `${currentPath}/${name}`;
- if (
- mimeType === "application/vnd.google-apps.folder" &&
- CONFIG.recursive
- ) {
- log_(`Exploring subfolder: ${name} (${id}), Depth: ${depth + 1}`);
- fetchFilesAndSubfolders_(
- id,
- null,
- names,
- ids,
- paths,
- depth + 1,
- fullPath
- );
- } else {
- names.push([name]);
- ids.push([id]);
- paths.push([fullPath]);
- }
- }
- pageToken = response.nextPageToken;
- if (pageToken) {
- PropertiesService.getScriptProperties().setProperty(
- CONFIG.propertyKey,
- pageToken
- );
- PropertiesService.getScriptProperties().setProperty(
- "LAST_PROCESSED_FOLDER",
- folderId
- );
- }
- } while (pageToken);
- }
- /**
- * Writes data in chunks to the Google Sheet to avoid memory issues.
- *
- * @param {GoogleAppsScript.Spreadsheet.Sheet} sheet - The Google Sheet to write data to.
- * @param {Array} names - The array of file names.
- * @param {Array} ids - The array of file IDs.
- * @param {Array} paths - The array of file paths.
- * @param {number} startRow - The starting row for writing data.
- * @returns {void}
- * @private
- */
- function writeInChunks_(sheet, names, ids, paths, startRow) {
- const { chunkSize, fileNameColumn, fileIdColumn, filePathColumn } = CONFIG;
- for (let i = 0; i < names.length; i += chunkSize) {
- const nameChunk = names.slice(i, i + chunkSize);
- const idChunk = ids.slice(i, i + chunkSize);
- const pathChunk = paths.slice(i, i + chunkSize);
- sheet
- .getRange(startRow, fileNameColumn, nameChunk.length, 1)
- .setValues(nameChunk);
- sheet
- .getRange(startRow, fileIdColumn, idChunk.length, 1)
- .setValues(idChunk);
- sheet
- .getRange(startRow, filePathColumn, pathChunk.length, 1)
- .setValues(pathChunk);
- startRow += nameChunk.length;
- log_(
- `Wrote ${nameChunk.length} rows to the sheet, starting from row ${
- startRow - nameChunk.length
- }.`
- );
- }
- }
- /**
- * Handles API rate limit errors with exponential backoff.
- *
- * @param {Error} error - The error object.
- * @private
- */
- function handleRateLimit_({ message }) {
- log_(`Error in getFileIds: ${message}`, "error");
- if (message.includes("Rate Limit Exceeded")) {
- const delay = Math.min(60000, CONFIG.initialBackoff * 2 ** retryCount);
- log_(`Rate limit exceeded, retrying in ${delay / 1000} seconds...`);
- Utilities.sleep(delay);
- retryCount++;
- getFileIds();
- } else {
- throw new Error(
- "An error occurred while retrieving files or writing data. Check logs for details."
- );
- }
- }
- /**
- * Validates the configuration settings.
- * @returns {void}
- * @private
- */
- function validateConfig_() {
- if (!CONFIG.folderId)
- throw new Error(
- "CONFIG.folderId is not set. Please specify a valid folder ID."
- );
- if (CONFIG.chunkSize <= 0)
- throw new Error("CONFIG.chunkSize must be greater than 0.");
- }
- /**
- * Logs messages with different levels.
- *
- * @param {string} message - The message to log.
- * @param {"info"|"warn"|"error"} [level="info"] - The level of the log message.
- * @returns {void}
- * @private
- */
- function log_(message, level = "info") {
- console[level](message);
- }
Advertisement
Add Comment
Please, Sign In to add comment