snegir

adwords to bq

Mar 21st, 2018
462
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 19.15 KB | None | 0 0
  1. // Copyright 2016, Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14.  
  15. /**
  16. * @name Export Data to BigQuery
  17. *
  18. * @overview The Export Data to BigQuery script sets up a BigQuery
  19. * dataset and tables, downloads a report from AdWords and then
  20. * loads the report to BigQuery.
  21. *
  22. * @author AdWords Scripts Team [[email protected]]
  23. *
  24. * @version 1.4
  25. *
  26. * @changelog
  27. * - version 1.4
  28. * - Inserts are split into <10Mb chunks.
  29. * - Compress backups to Drive.
  30. *
  31. * @changelog
  32. * - version 1.3
  33. * - Global string replace to escape quotes.
  34. *
  35. * @changelog
  36. * - version 1.2
  37. * - Global string replace to remove commas.
  38. *
  39. * @changelog
  40. * - version 1.1
  41. * - Removed commas from numbers to fix formatting issues.
  42. *
  43. * @changelog
  44. * - version 1.0
  45. * - Released initial version.
  46. */
  47.  
  48. var CONFIG = {
  49. BIGQUERY_PROJECT_ID: 'streameng-184913',
  50. BIGQUERY_DATASET_ID: 'adwords_account2',
  51.  
  52. // Truncate existing data, otherwise will append.
  53. TRUNCATE_EXISTING_DATASET: false,
  54. TRUNCATE_EXISTING_TABLES: false,
  55.  
  56. // Back up reports to Google Drive.
  57. WRITE_DATA_TO_DRIVE: false,
  58. // Folder to put all the intermediate files.
  59. DRIVE_FOLDER: 'INSERT_FOLDER_NAME',
  60.  
  61. // Default date range over which statistics fields are retrieved.
  62. DEFAULT_DATE_RANGE: 'YESTERDAY',
  63.  
  64. // Lists of reports and fields to retrieve from AdWords.
  65. REPORTS: [{NAME: 'ACCOUNT_PERFORMANCE_REPORT',
  66. CONDITIONS: '',
  67. FIELDS: {'Cost' : 'FLOAT',
  68. 'AverageCpc' : 'FLOAT',
  69. 'Ctr' : 'FLOAT',
  70. 'AveragePosition' : 'FLOAT',
  71. 'Impressions' : 'INTEGER',
  72. 'Clicks' : 'INTEGER',
  73. 'Date' : 'STRING'
  74. }
  75. }, {NAME: 'KEYWORDS_PERFORMANCE_REPORT',
  76. CONDITIONS: 'WHERE CampaignStatus = ENABLED',
  77. FIELDS: {'CampaignName' : 'STRING',
  78. 'AdGroupName' : 'STRING',
  79. 'Criteria' : 'STRING',
  80. 'Impressions' : 'INTEGER',
  81. 'Cost' : 'FLOAT',
  82. 'Clicks' : 'INTEGER',
  83. 'QualityScore' : 'FLOAT',
  84. 'Date' : 'STRING',
  85. 'AdGroupId': 'STRING',
  86. 'AdGroupName': 'STRING',
  87. 'AdGroupStatus': 'STRING',
  88. 'BidType': 'STRING',
  89. 'CampaignId': 'STRING',
  90. 'CampaignName': 'STRING',
  91. 'CampaignStatus': 'STRING',
  92. 'Id': 'STRING',
  93. 'KeywordMatchType': 'STRING',
  94. 'Status': 'STRING'
  95. }
  96. }, {NAME: 'SEARCH_QUERY_PERFORMANCE_REPORT',
  97. CONDITIONS: '',
  98. FIELDS: {'Query' : 'STRING',
  99. 'Cost' : 'FLOAT',
  100. 'AverageCpc' : 'FLOAT',
  101. 'Ctr' : 'FLOAT',
  102. 'AveragePosition' : 'FLOAT',
  103. 'Impressions' : 'INTEGER',
  104. 'Clicks' : 'INTEGER',
  105. 'Date' : 'STRING'
  106. }
  107.  
  108. }, {NAME: 'CLICK_PERFORMANCE_REPORT',
  109. CONDITIONS: 'WHERE CampaignStatus = ENABLED',
  110. FIELDS: {'AdFormat' : 'STRING',
  111. 'AdGroupId' : 'STRING',
  112. 'AdGroupName' : 'STRING',
  113. 'AdGroupStatus' : 'STRING',
  114. 'CampaignId': 'STRING',
  115. 'CampaignName' : 'STRING',
  116. 'CampaignStatus' : 'STRING',
  117. 'Clicks' : 'INTEGER',
  118. 'ClickType': 'STRING',
  119. 'CreativeId' : 'STRING',
  120. 'CriteriaId' : 'STRING',
  121. 'CriteriaParameters': 'STRING',
  122. 'Date': 'STRING',
  123. 'Device': 'STRING',
  124. 'GclId': 'STRING',
  125. 'KeywordMatchType': 'STRING',
  126. 'Page': 'STRING',
  127. 'Slot' : 'STRING'
  128. }
  129. }
  130. ],
  131.  
  132. RECIPIENT_EMAILS: [
  133. ]
  134. };
  135.  
  136. // Impose a limit on the size of BQ inserts: 10MB - 512Kb for overheads.
  137. var MAX_INSERT_SIZE = 10 * 1024 * 1024 - 512 * 1024;
  138.  
  139. /**
  140. * Main method
  141. */
  142. function main() {
  143. createDataset();
  144. for (var i = 0; i < CONFIG.REPORTS.length; i++) {
  145. var reportConfig = CONFIG.REPORTS[i];
  146. createTable(reportConfig);
  147. }
  148.  
  149. var jobIds = processReports();
  150. waitTillJobsComplete(jobIds);
  151. sendEmail(jobIds);
  152. }
  153.  
  154.  
  155. /**
  156. * Creates a new dataset.
  157. *
  158. * If a dataset with the same id already exists and the truncate flag
  159. * is set, will truncate the old dataset. If the truncate flag is not
  160. * set, then will not create a new dataset.
  161. */
  162. function createDataset() {
  163. if (datasetExists()) {
  164. if (CONFIG.TRUNCATE_EXISTING_DATASET) {
  165. BigQuery.Datasets.remove(CONFIG.BIGQUERY_PROJECT_ID,
  166. CONFIG.BIGQUERY_DATASET_ID, {'deleteContents' : true});
  167. Logger.log('Truncated dataset.');
  168. } else {
  169. Logger.log('Dataset %s already exists. Will not recreate.',
  170. CONFIG.BIGQUERY_DATASET_ID);
  171. return;
  172. }
  173. }
  174.  
  175. // Create new dataset.
  176. var dataSet = BigQuery.newDataset();
  177. dataSet.friendlyName = CONFIG.BIGQUERY_DATASET_ID;
  178. dataSet.datasetReference = BigQuery.newDatasetReference();
  179. dataSet.datasetReference.projectId = CONFIG.BIGQUERY_PROJECT_ID;
  180. dataSet.datasetReference.datasetId = CONFIG.BIGQUERY_DATASET_ID;
  181.  
  182. dataSet = BigQuery.Datasets.insert(dataSet, CONFIG.BIGQUERY_PROJECT_ID);
  183. Logger.log('Created dataset with id %s.', dataSet.id);
  184. }
  185.  
  186. /**
  187. * Checks if dataset already exists in project.
  188. *
  189. * @return {boolean} Returns true if dataset already exists.
  190. */
  191. function datasetExists() {
  192. // Get a list of all datasets in project.
  193. var datasets = BigQuery.Datasets.list(CONFIG.BIGQUERY_PROJECT_ID);
  194. var datasetExists = false;
  195. // Iterate through each dataset and check for an id match.
  196. if (datasets.datasets != null) {
  197. for (var i = 0; i < datasets.datasets.length; i++) {
  198. var dataset = datasets.datasets[i];
  199. if (dataset.datasetReference.datasetId == CONFIG.BIGQUERY_DATASET_ID) {
  200. datasetExists = true;
  201. break;
  202. }
  203. }
  204. }
  205. return datasetExists;
  206. }
  207.  
  208. /**
  209. * Creates a new table.
  210. *
  211. * If a table with the same id already exists and the truncate flag
  212. * is set, will truncate the old table. If the truncate flag is not
  213. * set, then will not create a new table.
  214. *
  215. * @param {Object} reportConfig Report configuration including report name,
  216. * conditions, and fields.
  217. */
  218. function createTable(reportConfig) {
  219. if (tableExists(reportConfig.NAME)) {
  220. if (CONFIG.TRUNCATE_EXISTING_TABLES) {
  221. BigQuery.Tables.remove(CONFIG.BIGQUERY_PROJECT_ID,
  222. CONFIG.BIGQUERY_DATASET_ID, reportConfig.NAME);
  223. Logger.log('Truncated table %s.', reportConfig.NAME);
  224. } else {
  225. Logger.log('Table %s already exists. Will not recreate.',
  226. reportConfig.NAME);
  227. return;
  228. }
  229. }
  230.  
  231. // Create new table.
  232. var table = BigQuery.newTable();
  233. var schema = BigQuery.newTableSchema();
  234. var bigQueryFields = [];
  235.  
  236. // Add each field to table schema.
  237. var fieldNames = Object.keys(reportConfig.FIELDS);
  238. for (var i = 0; i < fieldNames.length; i++) {
  239. var fieldName = fieldNames[i];
  240. var bigQueryFieldSchema = BigQuery.newTableFieldSchema();
  241. bigQueryFieldSchema.description = fieldName;
  242. bigQueryFieldSchema.name = fieldName;
  243. bigQueryFieldSchema.type = reportConfig.FIELDS[fieldName];
  244.  
  245. bigQueryFields.push(bigQueryFieldSchema);
  246. }
  247.  
  248. schema.fields = bigQueryFields;
  249. table.schema = schema;
  250. table.friendlyName = reportConfig.NAME;
  251.  
  252. table.tableReference = BigQuery.newTableReference();
  253. table.tableReference.datasetId = CONFIG.BIGQUERY_DATASET_ID;
  254. table.tableReference.projectId = CONFIG.BIGQUERY_PROJECT_ID;
  255. table.tableReference.tableId = reportConfig.NAME;
  256.  
  257. table = BigQuery.Tables.insert(table, CONFIG.BIGQUERY_PROJECT_ID,
  258. CONFIG.BIGQUERY_DATASET_ID);
  259.  
  260. Logger.log('Created table with id %s.', table.id);
  261. }
  262.  
  263. /**
  264. * Checks if table already exists in dataset.
  265. *
  266. * @param {string} tableId The table id to check existence.
  267. *
  268. * @return {boolean} Returns true if table already exists.
  269. */
  270. function tableExists(tableId) {
  271. // Get a list of all tables in the dataset.
  272. var tables = BigQuery.Tables.list(CONFIG.BIGQUERY_PROJECT_ID,
  273. CONFIG.BIGQUERY_DATASET_ID);
  274. var tableExists = false;
  275. // Iterate through each table and check for an id match.
  276. if (tables.tables != null) {
  277. for (var i = 0; i < tables.tables.length; i++) {
  278. var table = tables.tables[i];
  279. if (table.tableReference.tableId == tableId) {
  280. tableExists = true;
  281. break;
  282. }
  283. }
  284. }
  285. return tableExists;
  286. }
  287.  
  288. /**
  289. * Process all configured reports
  290. *
  291. * Iterates through each report to: retrieve AdWords data,
  292. * backup data to Drive (if configured), load data to BigQuery.
  293. *
  294. * @return {Array.<string>} jobIds The list of all job ids.
  295. */
  296. function processReports() {
  297. var jobIds = [];
  298.  
  299. // Iterate over each report type.
  300. for (var i = 0; i < CONFIG.REPORTS.length; i++) {
  301. var reportConfig = CONFIG.REPORTS[i];
  302. Logger.log('Running report %s', reportConfig.NAME);
  303. // Get data as an array of CSV chunks.
  304. var csvData = retrieveAdwordsReport(reportConfig);
  305.  
  306. // If configured, back up data.
  307. if (CONFIG.WRITE_DATA_TO_DRIVE) {
  308. var folder = getDriveFolder();
  309. for (var r = 0; r < csvData.length; r++) {
  310. var fileName = reportConfig.NAME + '_' + (r + 1);
  311. saveCompressedCsvFile(folder, fileName, csvData[r]);
  312. }
  313. Logger.log('Exported data to Drive folder %s for report %s.',
  314. CONFIG.DRIVE_FOLDER, reportConfig.NAME);
  315. }
  316.  
  317. for (var j = 0; j < csvData.length; j++) {
  318. // Convert to Blob format.
  319. var blobData = Utilities.newBlob(csvData[j], 'application/octet-stream');
  320. // Load data
  321. var jobId = loadDataToBigquery(reportConfig, blobData, !j ? 1 : 0);
  322. jobIds.push(jobId);
  323. }
  324. }
  325. return jobIds;
  326. }
  327.  
  328. /**
  329. * Writes a CSV file to Drive, compressing as a zip file.
  330. *
  331. * @param {!Folder} folder The parent folder for the file.
  332. * @param {string} fileName The name for the file.
  333. * @param {string} csvData The CSV data to write to the file.
  334. */
  335. function saveCompressedCsvFile(folder, fileName, csvData) {
  336. var compressed = Utilities.zip([Utilities.newBlob(csvData)]);
  337. compressed.setName(fileName);
  338. folder.createFile(compressed);
  339. }
  340.  
  341. /**
  342. * Retrieves AdWords data as csv and formats any fields
  343. * to BigQuery expected format.
  344. *
  345. * @param {Object} reportConfig Report configuration including report name,
  346. * conditions, and fields.
  347. *
  348. * @return {!Array.<string>} a chunked report in csv format.
  349. */
  350. function retrieveAdwordsReport(reportConfig) {
  351. var fieldNames = Object.keys(reportConfig.FIELDS);
  352. var report = AdWordsApp.report(
  353. 'SELECT ' + fieldNames.join(',') +
  354. ' FROM ' + reportConfig.NAME + ' ' + reportConfig.CONDITIONS +
  355. ' DURING ' + CONFIG.DEFAULT_DATE_RANGE);
  356. var rows = report.rows();
  357. var chunks = [];
  358. var chunkLen = 0;
  359. var csvRows = [];
  360. var totalRows = 0;
  361. // Header row
  362. var header = fieldNames.join(',');
  363. csvRows.push(header);
  364. chunkLen += Utilities.newBlob(header).getBytes().length + 1;
  365.  
  366. // Iterate over each row.
  367. while (rows.hasNext()) {
  368. var row = rows.next();
  369.  
  370. if (chunkLen > MAX_INSERT_SIZE) {
  371. chunks.push(csvRows.join('\n'));
  372. totalRows += csvRows.length;
  373. chunkLen = 0;
  374. csvRows = [];
  375. }
  376. var csvRow = [];
  377. for (var i = 0; i < fieldNames.length; i++) {
  378. var fieldName = fieldNames[i];
  379. var fieldValue = row[fieldName].toString();
  380. var fieldType = reportConfig.FIELDS[fieldName];
  381. // Strip off % and perform any other formatting here.
  382. if (fieldType == 'FLOAT' || fieldType == 'INTEGER') {
  383. if (fieldValue.charAt(fieldValue.length - 1) == '%') {
  384. fieldValue = fieldValue.substring(0, fieldValue.length - 1);
  385. }
  386. fieldValue = fieldValue.replace(/,/g,'');
  387. }
  388. // Add double quotes to any string values.
  389. if (fieldType == 'STRING') {
  390. fieldValue = fieldValue.replace(/"/g, '""');
  391. fieldValue = '"' + fieldValue + '"';
  392. }
  393. csvRow.push(fieldValue);
  394. }
  395. var rowString = csvRow.join(',');
  396. csvRows.push(rowString);
  397. chunkLen += Utilities.newBlob(rowString).getBytes().length + 1;
  398. }
  399. if (csvRows) {
  400. totalRows += csvRows.length;
  401. chunks.push(csvRows.join('\n'));
  402. }
  403. Logger.log('Downloaded ' + reportConfig.NAME + ' with ' + totalRows +
  404. ' rows, in ' + chunks.length + ' chunks.');
  405. return chunks;
  406. }
  407.  
  408. /**
  409. * Creates a new Google Drive folder. If folder name is already in
  410. * use will pick the first folder with a matching name.
  411. *
  412. * @return {Folder} Google Drive folder to store reports.
  413. */
  414. function getDriveFolder() {
  415. var folders = DriveApp.getFoldersByName(CONFIG.DRIVE_FOLDER);
  416. // Assume first folder is the correct one.
  417. if (folders.hasNext()) {
  418. Logger.log('Folder name found. Using existing folder.');
  419. return folders.next();
  420. }
  421. return DriveApp.createFolder(CONFIG.DRIVE_FOLDER);
  422. }
  423.  
  424. /**
  425. * Creates a BigQuery insertJob to load csv data.
  426. *
  427. * @param {Object} reportConfig Report configuration including report name,
  428. * conditions, and fields.
  429. * @param {Blob} data Csv report data as an 'application/octet-stream' blob.
  430. * @param {number=} skipLeadingRows Optional number of rows to skip.
  431. *
  432. * @return {string} jobId The job id for upload.
  433. */
  434. function loadDataToBigquery(reportConfig, data, skipLeadingRows) {
  435. // Create the data upload job.
  436. var job = {
  437. configuration: {
  438. load: {
  439. destinationTable: {
  440. projectId: CONFIG.BIGQUERY_PROJECT_ID,
  441. datasetId: CONFIG.BIGQUERY_DATASET_ID,
  442. tableId: reportConfig.NAME
  443. },
  444. skipLeadingRows: skipLeadingRows ? skipLeadingRows : 0,
  445. nullMarker: '--'
  446. }
  447. }
  448. };
  449.  
  450. var insertJob = BigQuery.Jobs.insert(job, CONFIG.BIGQUERY_PROJECT_ID, data);
  451. Logger.log('Load job started for %s. Check on the status of it here: ' +
  452. 'https://bigquery.cloud.google.com/jobs/%s', reportConfig.NAME,
  453. CONFIG.BIGQUERY_PROJECT_ID);
  454. return insertJob.jobReference.jobId;
  455. }
  456.  
  457. /**
  458. * Polls until all jobs are 'DONE'.
  459. *
  460. * @param {Array.<string>} jobIds The list of all job ids.
  461. */
  462. function waitTillJobsComplete(jobIds) {
  463. var complete = false;
  464. var remainingJobs = jobIds;
  465. while (!complete) {
  466. if (AdWordsApp.getExecutionInfo().getRemainingTime() < 5){
  467. Logger.log('Script is about to timeout, jobs ' + remainingJobs.join(',') +
  468. ' are still incomplete.');
  469. }
  470. remainingJobs = getIncompleteJobs(remainingJobs);
  471. if (remainingJobs.length == 0) {
  472. complete = true;
  473. }
  474. if (!complete) {
  475. Logger.log(remainingJobs.length + ' jobs still being processed.');
  476. // Wait 5 seconds before checking status again.
  477. Utilities.sleep(5000);
  478. }
  479. }
  480. Logger.log('All jobs processed.');
  481. }
  482.  
  483. /**
  484. * Iterates through jobs and returns the ids for those jobs
  485. * that are not 'DONE'.
  486. *
  487. * @param {Array.<string>} jobIds The list of job ids.
  488. *
  489. * @return {Array.<string>} remainingJobIds The list of remaining job ids.
  490. */
  491. function getIncompleteJobs(jobIds) {
  492. var remainingJobIds = [];
  493. for (var i = 0; i < jobIds.length; i++) {
  494. var jobId = jobIds[i];
  495. var getJob = BigQuery.Jobs.get(CONFIG.BIGQUERY_PROJECT_ID, jobId);
  496. if (getJob.status.state != 'DONE') {
  497. remainingJobIds.push(jobId);
  498. }
  499. }
  500. return remainingJobIds;
  501. }
  502.  
  503.  
  504. /**
  505. * Sends a notification email that jobs have completed loading.
  506. *
  507. * @param {Array.<string>} jobIds The list of all job ids.
  508. */
  509. function sendEmail(jobIds) {
  510. var html = [];
  511. html.push(
  512. '<html>',
  513. '<body>',
  514. '<table width=800 cellpadding=0 border=0 cellspacing=0>',
  515. '<tr>',
  516. '<td colspan=2 align=right>',
  517. "<div style='font: italic normal 10pt Times New Roman, serif; " +
  518. "margin: 0; color: #666; padding-right: 5px;'>" +
  519. 'Powered by AdWords Scripts</div>',
  520. '</td>',
  521. '</tr>',
  522. "<tr bgcolor='#3c78d8'>",
  523. '<td width=500>',
  524. "<div style='font: normal 18pt verdana, sans-serif; " +
  525. "padding: 3px 10px; color: white'>Adwords data load to " +
  526. "Bigquery report</div>",
  527. '</td>',
  528. '<td align=right>',
  529. "<div style='font: normal 18pt verdana, sans-serif; " +
  530. "padding: 3px 10px; color: white'>",
  531. AdWordsApp.currentAccount().getCustomerId(),
  532. '</tr>',
  533. '</table>',
  534. '<table width=800 cellpadding=0 border=1 cellspacing=0>',
  535. "<tr bgcolor='#ddd'>",
  536. "<td style='font: 12pt verdana, sans-serif; " +
  537. 'padding: 5px 0px 5px 5px; background-color: #ddd; ' +
  538. "text-align: left'>Report</td>",
  539. "<td style='font: 12pt verdana, sans-serif; " +
  540. 'padding: 5px 0px 5px 5px; background-color: #ddd; ' +
  541. "text-align: left'>JobId</td>",
  542. "<td style='font: 12pt verdana, sans-serif; " +
  543. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  544. "text-align: left'>Rows</td>",
  545. "<td style='font: 12pt verdana, sans-serif; " +
  546. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  547. "text-align: left'>State</td>",
  548. "<td style='font: 12pt verdana, sans-serif; " +
  549. 'padding: 5px 0px 5x 5px; background-color: #ddd; ' +
  550. "text-align: left'>ErrorResult</td>",
  551. '</tr>',
  552. createTableRows(jobIds),
  553. '</table>',
  554. '</body>',
  555. '</html>');
  556.  
  557. MailApp.sendEmail(CONFIG.RECIPIENT_EMAILS.join(','),
  558. 'Adwords data load to Bigquery Complete', '',
  559. {htmlBody: html.join('\n')});
  560. }
  561.  
  562. /**
  563. * Creates table rows for email report.
  564. *
  565. * @param {Array.<string>} jobIds The list of all job ids.
  566. */
  567. function createTableRows(jobIds) {
  568. var html = [];
  569. for (var i = 0; i < jobIds.length; i++) {
  570. var jobId = jobIds[i];
  571. var job = BigQuery.Jobs.get(CONFIG.BIGQUERY_PROJECT_ID, jobId);
  572. var errorResult = '';
  573. if (job.status.errorResult) {
  574. errorResult = job.status.errorResult;
  575. }
  576.  
  577. html.push('<tr>',
  578. "<td style='padding: 0px 10px'>" +
  579. job.configuration.load.destinationTable.tableId + '</td>',
  580. "<td style='padding: 0px 10px'>" + jobId + '</td>',
  581. "<td style='padding: 0px 10px'>" +
  582. (job.statistics.load ? job.statistics.load.outputRows : 0) +'</td>',
  583. "<td style='padding: 0px 10px'>" + job.status.state + '</td>',
  584. "<td style='padding: 0px 10px'>" + errorResult + '</td>',
  585. '</tr>');
  586. }
  587. return html.join('\n');
  588. }
Advertisement
Add Comment
Please, Sign In to add comment