Advertisement
snegir

adwords to bq

Mar 21st, 2018
416
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 19.15 KB | None | 0 0
  1. // Copyright 2016, Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14.  
  15. /**
  16. * @name Export Data to BigQuery
  17. *
  18. * @overview The Export Data to BigQuery script sets up a BigQuery
  19. * dataset and tables, downloads a report from AdWords and then
  20. * loads the report to BigQuery.
  21. *
  22. * @author AdWords Scripts Team [adwords-scripts@googlegroups.com]
  23. *
  24. * @version 1.4
  25. *
  26. * @changelog
  27. * - version 1.4
  28. * - Inserts are split into <10Mb chunks.
  29. * - Compress backups to Drive.
  30. *
  31. * @changelog
  32. * - version 1.3
  33. * - Global string replace to escape quotes.
  34. *
  35. * @changelog
  36. * - version 1.2
  37. * - Global string replace to remove commas.
  38. *
  39. * @changelog
  40. * - version 1.1
  41. * - Removed commas from numbers to fix formatting issues.
  42. *
  43. * @changelog
  44. * - version 1.0
  45. * - Released initial version.
  46. */
  47.  
  48. var CONFIG = {
  49. BIGQUERY_PROJECT_ID: 'streameng-184913',
  50. BIGQUERY_DATASET_ID: 'adwords_account2',
  51.  
  52. // Truncate existing data, otherwise will append.
  53. TRUNCATE_EXISTING_DATASET: false,
  54. TRUNCATE_EXISTING_TABLES: false,
  55.  
  56. // Back up reports to Google Drive.
  57. WRITE_DATA_TO_DRIVE: false,
  58. // Folder to put all the intermediate files.
  59. DRIVE_FOLDER: 'INSERT_FOLDER_NAME',
  60.  
  61. // Default date range over which statistics fields are retrieved.
  62. DEFAULT_DATE_RANGE: 'YESTERDAY',
  63.  
  64. // Lists of reports and fields to retrieve from AdWords.
  65. REPORTS: [{NAME: 'ACCOUNT_PERFORMANCE_REPORT',
  66. CONDITIONS: '',
  67. FIELDS: {'Cost' : 'FLOAT',
  68. 'AverageCpc' : 'FLOAT',
  69. 'Ctr' : 'FLOAT',
  70. 'AveragePosition' : 'FLOAT',
  71. 'Impressions' : 'INTEGER',
  72. 'Clicks' : 'INTEGER',
  73. 'Date' : 'STRING'
  74. }
  75. }, {NAME: 'KEYWORDS_PERFORMANCE_REPORT',
  76. CONDITIONS: 'WHERE CampaignStatus = ENABLED',
  77. FIELDS: {'CampaignName' : 'STRING',
  78. 'AdGroupName' : 'STRING',
  79. 'Criteria' : 'STRING',
  80. 'Impressions' : 'INTEGER',
  81. 'Cost' : 'FLOAT',
  82. 'Clicks' : 'INTEGER',
  83. 'QualityScore' : 'FLOAT',
  84. 'Date' : 'STRING',
  85. 'AdGroupId': 'STRING',
  86. 'AdGroupName': 'STRING',
  87. 'AdGroupStatus': 'STRING',
  88. 'BidType': 'STRING',
  89. 'CampaignId': 'STRING',
  90. 'CampaignName': 'STRING',
  91. 'CampaignStatus': 'STRING',
  92. 'Id': 'STRING',
  93. 'KeywordMatchType': 'STRING',
  94. 'Status': 'STRING'
  95. }
  96. }, {NAME: 'SEARCH_QUERY_PERFORMANCE_REPORT',
  97. CONDITIONS: '',
  98. FIELDS: {'Query' : 'STRING',
  99. 'Cost' : 'FLOAT',
  100. 'AverageCpc' : 'FLOAT',
  101. 'Ctr' : 'FLOAT',
  102. 'AveragePosition' : 'FLOAT',
  103. 'Impressions' : 'INTEGER',
  104. 'Clicks' : 'INTEGER',
  105. 'Date' : 'STRING'
  106. }
  107.  
  108. }, {NAME: 'CLICK_PERFORMANCE_REPORT',
  109. CONDITIONS: 'WHERE CampaignStatus = ENABLED',
  110. FIELDS: {'AdFormat' : 'STRING',
  111. 'AdGroupId' : 'STRING',
  112. 'AdGroupName' : 'STRING',
  113. 'AdGroupStatus' : 'STRING',
  114. 'CampaignId': 'STRING',
  115. 'CampaignName' : 'STRING',
  116. 'CampaignStatus' : 'STRING',
  117. 'Clicks' : 'INTEGER',
  118. 'ClickType': 'STRING',
  119. 'CreativeId' : 'STRING',
  120. 'CriteriaId' : 'STRING',
  121. 'CriteriaParameters': 'STRING',
  122. 'Date': 'STRING',
  123. 'Device': 'STRING',
  124. 'GclId': 'STRING',
  125. 'KeywordMatchType': 'STRING',
  126. 'Page': 'STRING',
  127. 'Slot' : 'STRING'
  128. }
  129. }
  130. ],
  131.  
  132. RECIPIENT_EMAILS: [
  133. 's@gmail.com'
  134. ]
  135. };
  136.  
  137. // Impose a limit on the size of BQ inserts: 10MB - 512Kb for overheads.
  138. var MAX_INSERT_SIZE = 10 * 1024 * 1024 - 512 * 1024;
  139.  
  140. /**
  141. * Main method
  142. */
  143. function main() {
  144. createDataset();
  145. for (var i = 0; i < CONFIG.REPORTS.length; i++) {
  146. var reportConfig = CONFIG.REPORTS[i];
  147. createTable(reportConfig);
  148. }
  149.  
  150. var jobIds = processReports();
  151. waitTillJobsComplete(jobIds);
  152. sendEmail(jobIds);
  153. }
  154.  
  155.  
  156. /**
  157. * Creates a new dataset.
  158. *
  159. * If a dataset with the same id already exists and the truncate flag
  160. * is set, will truncate the old dataset. If the truncate flag is not
  161. * set, then will not create a new dataset.
  162. */
  163. function createDataset() {
  164. if (datasetExists()) {
  165. if (CONFIG.TRUNCATE_EXISTING_DATASET) {
  166. BigQuery.Datasets.remove(CONFIG.BIGQUERY_PROJECT_ID,
  167. CONFIG.BIGQUERY_DATASET_ID, {'deleteContents' : true});
  168. Logger.log('Truncated dataset.');
  169. } else {
  170. Logger.log('Dataset %s already exists. Will not recreate.',
  171. CONFIG.BIGQUERY_DATASET_ID);
  172. return;
  173. }
  174. }
  175.  
  176. // Create new dataset.
  177. var dataSet = BigQuery.newDataset();
  178. dataSet.friendlyName = CONFIG.BIGQUERY_DATASET_ID;
  179. dataSet.datasetReference = BigQuery.newDatasetReference();
  180. dataSet.datasetReference.projectId = CONFIG.BIGQUERY_PROJECT_ID;
  181. dataSet.datasetReference.datasetId = CONFIG.BIGQUERY_DATASET_ID;
  182.  
  183. dataSet = BigQuery.Datasets.insert(dataSet, CONFIG.BIGQUERY_PROJECT_ID);
  184. Logger.log('Created dataset with id %s.', dataSet.id);
  185. }
  186.  
  187. /**
  188. * Checks if dataset already exists in project.
  189. *
  190. * @return {boolean} Returns true if dataset already exists.
  191. */
  192. function datasetExists() {
  193. // Get a list of all datasets in project.
  194. var datasets = BigQuery.Datasets.list(CONFIG.BIGQUERY_PROJECT_ID);
  195. var datasetExists = false;
  196. // Iterate through each dataset and check for an id match.
  197. if (datasets.datasets != null) {
  198. for (var i = 0; i < datasets.datasets.length; i++) {
  199. var dataset = datasets.datasets[i];
  200. if (dataset.datasetReference.datasetId == CONFIG.BIGQUERY_DATASET_ID) {
  201. datasetExists = true;
  202. break;
  203. }
  204. }
  205. }
  206. return datasetExists;
  207. }
  208.  
  209. /**
  210. * Creates a new table.
  211. *
  212. * If a table with the same id already exists and the truncate flag
  213. * is set, will truncate the old table. If the truncate flag is not
  214. * set, then will not create a new table.
  215. *
  216. * @param {Object} reportConfig Report configuration including report name,
  217. * conditions, and fields.
  218. */
  219. function createTable(reportConfig) {
  220. if (tableExists(reportConfig.NAME)) {
  221. if (CONFIG.TRUNCATE_EXISTING_TABLES) {
  222. BigQuery.Tables.remove(CONFIG.BIGQUERY_PROJECT_ID,
  223. CONFIG.BIGQUERY_DATASET_ID, reportConfig.NAME);
  224. Logger.log('Truncated table %s.', reportConfig.NAME);
  225. } else {
  226. Logger.log('Table %s already exists. Will not recreate.',
  227. reportConfig.NAME);
  228. return;
  229. }
  230. }
  231.  
  232. // Create new table.
  233. var table = BigQuery.newTable();
  234. var schema = BigQuery.newTableSchema();
  235. var bigQueryFields = [];
  236.  
  237. // Add each field to table schema.
  238. var fieldNames = Object.keys(reportConfig.FIELDS);
  239. for (var i = 0; i < fieldNames.length; i++) {
  240. var fieldName = fieldNames[i];
  241. var bigQueryFieldSchema = BigQuery.newTableFieldSchema();
  242. bigQueryFieldSchema.description = fieldName;
  243. bigQueryFieldSchema.name = fieldName;
  244. bigQueryFieldSchema.type = reportConfig.FIELDS[fieldName];
  245.  
  246. bigQueryFields.push(bigQueryFieldSchema);
  247. }
  248.  
  249. schema.fields = bigQueryFields;
  250. table.schema = schema;
  251. table.friendlyName = reportConfig.NAME;
  252.  
  253. table.tableReference = BigQuery.newTableReference();
  254. table.tableReference.datasetId = CONFIG.BIGQUERY_DATASET_ID;
  255. table.tableReference.projectId = CONFIG.BIGQUERY_PROJECT_ID;
  256. table.tableReference.tableId = reportConfig.NAME;
  257.  
  258. table = BigQuery.Tables.insert(table, CONFIG.BIGQUERY_PROJECT_ID,
  259. CONFIG.BIGQUERY_DATASET_ID);
  260.  
  261. Logger.log('Created table with id %s.', table.id);
  262. }
  263.  
  264. /**
  265. * Checks if table already exists in dataset.
  266. *
  267. * @param {string} tableId The table id to check existence.
  268. *
  269. * @return {boolean} Returns true if table already exists.
  270. */
  271. function tableExists(tableId) {
  272. // Get a list of all tables in the dataset.
  273. var tables = BigQuery.Tables.list(CONFIG.BIGQUERY_PROJECT_ID,
  274. CONFIG.BIGQUERY_DATASET_ID);
  275. var tableExists = false;
  276. // Iterate through each table and check for an id match.
  277. if (tables.tables != null) {
  278. for (var i = 0; i < tables.tables.length; i++) {
  279. var table = tables.tables[i];
  280. if (table.tableReference.tableId == tableId) {
  281. tableExists = true;
  282. break;
  283. }
  284. }
  285. }
  286. return tableExists;
  287. }
  288.  
  289. /**
  290. * Process all configured reports
  291. *
  292. * Iterates through each report to: retrieve AdWords data,
  293. * backup data to Drive (if configured), load data to BigQuery.
  294. *
  295. * @return {Array.<string>} jobIds The list of all job ids.
  296. */
  297. function processReports() {
  298. var jobIds = [];
  299.  
  300. // Iterate over each report type.
  301. for (var i = 0; i < CONFIG.REPORTS.length; i++) {
  302. var reportConfig = CONFIG.REPORTS[i];
  303. Logger.log('Running report %s', reportConfig.NAME);
  304. // Get data as an array of CSV chunks.
  305. var csvData = retrieveAdwordsReport(reportConfig);
  306.  
  307. // If configured, back up data.
  308. if (CONFIG.WRITE_DATA_TO_DRIVE) {
  309. var folder = getDriveFolder();
  310. for (var r = 0; r < csvData.length; r++) {
  311. var fileName = reportConfig.NAME + '_' + (r + 1);
  312. saveCompressedCsvFile(folder, fileName, csvData[r]);
  313. }
  314. Logger.log('Exported data to Drive folder %s for report %s.',
  315. CONFIG.DRIVE_FOLDER, reportConfig.NAME);
  316. }
  317.  
  318. for (var j = 0; j < csvData.length; j++) {
  319. // Convert to Blob format.
  320. var blobData = Utilities.newBlob(csvData[j], 'application/octet-stream');
  321. // Load data
  322. var jobId = loadDataToBigquery(reportConfig, blobData, !j ? 1 : 0);
  323. jobIds.push(jobId);
  324. }
  325. }
  326. return jobIds;
  327. }
  328.  
  329. /**
  330. * Writes a CSV file to Drive, compressing as a zip file.
  331. *
  332. * @param {!Folder} folder The parent folder for the file.
  333. * @param {string} fileName The name for the file.
  334. * @param {string} csvData The CSV data to write to the file.
  335. */
  336. function saveCompressedCsvFile(folder, fileName, csvData) {
  337. var compressed = Utilities.zip([Utilities.newBlob(csvData)]);
  338. compressed.setName(fileName);
  339. folder.createFile(compressed);
  340. }
  341.  
  342. /**
  343. * Retrieves AdWords data as csv and formats any fields
  344. * to BigQuery expected format.
  345. *
  346. * @param {Object} reportConfig Report configuration including report name,
  347. * conditions, and fields.
  348. *
  349. * @return {!Array.<string>} a chunked report in csv format.
  350. */
  351. function retrieveAdwordsReport(reportConfig) {
  352. var fieldNames = Object.keys(reportConfig.FIELDS);
  353. var report = AdWordsApp.report(
  354. 'SELECT ' + fieldNames.join(',') +
  355. ' FROM ' + reportConfig.NAME + ' ' + reportConfig.CONDITIONS +
  356. ' DURING ' + CONFIG.DEFAULT_DATE_RANGE);
  357. var rows = report.rows();
  358. var chunks = [];
  359. var chunkLen = 0;
  360. var csvRows = [];
  361. var totalRows = 0;
  362. // Header row
  363. var header = fieldNames.join(',');
  364. csvRows.push(header);
  365. chunkLen += Utilities.newBlob(header).getBytes().length + 1;
  366.  
  367. // Iterate over each row.
  368. while (rows.hasNext()) {
  369. var row = rows.next();
  370.  
  371. if (chunkLen > MAX_INSERT_SIZE) {
  372. chunks.push(csvRows.join('\n'));
  373. totalRows += csvRows.length;
  374. chunkLen = 0;
  375. csvRows = [];
  376. }
  377. var csvRow = [];
  378. for (var i = 0; i < fieldNames.length; i++) {
  379. var fieldName = fieldNames[i];
  380. var fieldValue = row[fieldName].toString();
  381. var fieldType = reportConfig.FIELDS[fieldName];
  382. // Strip off % and perform any other formatting here.
  383. if (fieldType == 'FLOAT' || fieldType == 'INTEGER') {
  384. if (fieldValue.charAt(fieldValue.length - 1) == '%') {
  385. fieldValue = fieldValue.substring(0, fieldValue.length - 1);
  386. }
  387. fieldValue = fieldValue.replace(/,/g,'');
  388. }
  389. // Add double quotes to any string values.
  390. if (fieldType == 'STRING') {
  391. fieldValue = fieldValue.replace(/"/g, '""');
  392. fieldValue = '"' + fieldValue + '"';
  393. }
  394. csvRow.push(fieldValue);
  395. }
  396. var rowString = csvRow.join(',');
  397. csvRows.push(rowString);
  398. chunkLen += Utilities.newBlob(rowString).getBytes().length + 1;
  399. }
  400. if (csvRows) {
  401. totalRows += csvRows.length;
  402. chunks.push(csvRows.join('\n'));
  403. }
  404. Logger.log('Downloaded ' + reportConfig.NAME + ' with ' + totalRows +
  405. ' rows, in ' + chunks.length + ' chunks.');
  406. return chunks;
  407. }
  408.  
  409. /**
  410. * Creates a new Google Drive folder. If folder name is already in
  411. * use will pick the first folder with a matching name.
  412. *
  413. * @return {Folder} Google Drive folder to store reports.
  414. */
  415. function getDriveFolder() {
  416. var folders = DriveApp.getFoldersByName(CONFIG.DRIVE_FOLDER);
  417. // Assume first folder is the correct one.
  418. if (folders.hasNext()) {
  419. Logger.log('Folder name found. Using existing folder.');
  420. return folders.next();
  421. }
  422. return DriveApp.createFolder(CONFIG.DRIVE_FOLDER);
  423. }
  424.  
  425. /**
  426. * Creates a BigQuery insertJob to load csv data.
  427. *
  428. * @param {Object} reportConfig Report configuration including report name,
  429. * conditions, and fields.
  430. * @param {Blob} data Csv report data as an 'application/octet-stream' blob.
  431. * @param {number=} skipLeadingRows Optional number of rows to skip.
  432. *
  433. * @return {string} jobId The job id for upload.
  434. */
  435. function loadDataToBigquery(reportConfig, data, skipLeadingRows) {
  436. // Create the data upload job.
  437. var job = {
  438. configuration: {
  439. load: {
  440. destinationTable: {
  441. projectId: CONFIG.BIGQUERY_PROJECT_ID,
  442. datasetId: CONFIG.BIGQUERY_DATASET_ID,
  443. tableId: reportConfig.NAME
  444. },
  445. skipLeadingRows: skipLeadingRows ? skipLeadingRows : 0,
  446. nullMarker: '--'
  447. }
  448. }
  449. };
  450.  
  451. var insertJob = BigQuery.Jobs.insert(job, CONFIG.BIGQUERY_PROJECT_ID, data);
  452. Logger.log('Load job started for %s. Check on the status of it here: ' +
  453. 'https://bigquery.cloud.google.com/jobs/%s', reportConfig.NAME,
  454. CONFIG.BIGQUERY_PROJECT_ID);
  455. return insertJob.jobReference.jobId;
  456. }
  457.  
  458. /**
  459. * Polls until all jobs are 'DONE'.
  460. *
  461. * @param {Array.<string>} jobIds The list of all job ids.
  462. */
  463. function waitTillJobsComplete(jobIds) {
  464. var complete = false;
  465. var remainingJobs = jobIds;
  466. while (!complete) {
  467. if (AdWordsApp.getExecutionInfo().getRemainingTime() < 5){
  468. Logger.log('Script is about to timeout, jobs ' + remainingJobs.join(',') +
  469. ' are still incomplete.');
  470. }
  471. remainingJobs = getIncompleteJobs(remainingJobs);
  472. if (remainingJobs.length == 0) {
  473. complete = true;
  474. }
  475. if (!complete) {
  476. Logger.log(remainingJobs.length + ' jobs still being processed.');
  477. // Wait 5 seconds before checking status again.
  478. Utilities.sleep(5000);
  479. }
  480. }
  481. Logger.log('All jobs processed.');
  482. }
  483.  
  484. /**
  485. * Iterates through jobs and returns the ids for those jobs
  486. * that are not 'DONE'.
  487. *
  488. * @param {Array.<string>} jobIds The list of job ids.
  489. *
  490. * @return {Array.<string>} remainingJobIds The list of remaining job ids.
  491. */
  492. function getIncompleteJobs(jobIds) {
  493. var remainingJobIds = [];
  494. for (var i = 0; i < jobIds.length; i++) {
  495. var jobId = jobIds[i];
  496. var getJob = BigQuery.Jobs.get(CONFIG.BIGQUERY_PROJECT_ID, jobId);
  497. if (getJob.status.state != 'DONE') {
  498. remainingJobIds.push(jobId);
  499. }
  500. }
  501. return remainingJobIds;
  502. }
  503.  
  504.  
  505. /**
  506. * Sends a notification email that jobs have completed loading.
  507. *
  508. * @param {Array.<string>} jobIds The list of all job ids.
  509. */
  510. function sendEmail(jobIds) {
  511. var html = [];
  512. html.push(
  513. '<html>',
  514. '<body>',
  515. '<table width=800 cellpadding=0 border=0 cellspacing=0>',
  516. '<tr>',
  517. '<td colspan=2 align=right>',
  518. "<div style='font: italic normal 10pt Times New Roman, serif; " +
  519. "margin: 0; color: #666; padding-right: 5px;'>" +
  520. 'Powered by AdWords Scripts</div>',
  521. '</td>',
  522. '</tr>',
  523. "<tr bgcolor='#3c78d8'>",
  524. '<td width=500>',
  525. "<div style='font: normal 18pt verdana, sans-serif; " +
  526. "padding: 3px 10px; color: white'>Adwords data load to " +
  527. "Bigquery report</div>",
  528. '</td>',
  529. '<td align=right>',
  530. "<div style='font: normal 18pt verdana, sans-serif; " +
  531. "padding: 3px 10px; color: white'>",
  532. AdWordsApp.currentAccount().getCustomerId(),
  533. '</tr>',
  534. '</table>',
  535. '<table width=800 cellpadding=0 border=1 cellspacing=0>',
  536. "<tr bgcolor='#ddd'>",
  537. "<td style='font: 12pt verdana, sans-serif; " +
  538. 'padding: 5px 0px 5px 5px; background-color: #ddd; ' +
  539. "text-align: left'>Report</td>",
  540. "<td style='font: 12pt verdana, sans-serif; " +
  541. 'padding: 5px 0px 5px 5px; background-color: #ddd; ' +
  542. "text-align: left'>JobId</td>",
  543. "<td style='font: 12pt verdana, sans-serif; " +
  544. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  545. "text-align: left'>Rows</td>",
  546. "<td style='font: 12pt verdana, sans-serif; " +
  547. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  548. "text-align: left'>State</td>",
  549. "<td style='font: 12pt verdana, sans-serif; " +
  550. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  551. "text-align: left'>ErrorResult</td>",
  552. '</tr>',
  553. createTableRows(jobIds),
  554. '</table>',
  555. '</body>',
  556. '</html>');
  557.  
  558. MailApp.sendEmail(CONFIG.RECIPIENT_EMAILS.join(','),
  559. 'Adwords data load to Bigquery Complete', '',
  560. {htmlBody: html.join('\n')});
  561. }
  562.  
  563. /**
  564. * Creates table rows for email report.
  565. *
  566. * @param {Array.<string>} jobIds The list of all job ids.
  567. */
  568. function createTableRows(jobIds) {
  569. var html = [];
  570. for (var i = 0; i < jobIds.length; i++) {
  571. var jobId = jobIds[i];
  572. var job = BigQuery.Jobs.get(CONFIG.BIGQUERY_PROJECT_ID, jobId);
  573. var errorResult = '';
  574. if (job.status.errorResult) {
  575. errorResult = job.status.errorResult;
  576. }
  577.  
  578. html.push('<tr>',
  579. "<td style='padding: 0px 10px'>" +
  580. job.configuration.load.destinationTable.tableId + '</td>',
  581. "<td style='padding: 0px 10px'>" + jobId + '</td>',
  582. "<td style='padding: 0px 10px'>" +
  583. (job.statistics.load ? job.statistics.load.outputRows : 0) +'</td>',
  584. "<td style='padding: 0px 10px'>" + job.status.state + '</td>',
  585. "<td style='padding: 0px 10px'>" + errorResult + '</td>',
  586. '</tr>');
  587. }
  588. return html.join('\n');
  589. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement