Advertisement
Guest User

Untitled

a guest
Sep 28th, 2016
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.21 KB | None | 0 0
  1.  
  2. #' Check a list of URLs for existence of the Google Tag Manager Container code
  3. #'
  4. #' @param containers
  5. #' A character vector of the container IDs for Google Tag Manager
  6. #' @param urlFile
  7. #' A character vector providing the path to a csv file with 2 columns: url, the full url to the page to be checked; country, the country where the URL is hosted.
  8. #'
  9. #' @param outputdir
  10. #' A character vector providing the path to the desired output directory for the csv of results. If not provided, the csv will be output to the current working directory.
  11. #'
  12. #' @return
  13. #' Writes a csv to outputdir with several columns: url, the url provided in the urlFile csv. country: the country providedd in the urlFile csv. After this, the function will return 2 columns for each GTM container snippet provided as an argument in containers - one column confirming if the snippet is provided in an iFrame, and a second column confirming if the snippet is provided in a script tag.
  14. #' @export
  15. #'
  16. #' @examples
  17. #' check_gtm(containers = c("GTM-TT98938", "GTM-FFD99D"), urlFile = "downloads/urlFile.csv")
  18. check_gtm <- function(containers, urlFile, outputdir = getwd() ) {
  19.  
  20. if (!require(httr)) install.packages("httr")
  21. if (!require(XML)) install.packages("XML")
  22. if (!require(dplyr)) install.packages("dplyr")
  23.  
  24. resultsFrame<-NULL
  25. urlList <- read.csv(urlFile, stringsAsFactors=FALSE)
  26.  
  27. for ( i in seq_along(urlList$url) ) {
  28. message(paste0("Checking url number ", i, " of ", length(urlList$url)))
  29. url <- urlList$url[i]
  30. html2<-GET(urlList[i,1])
  31. content2<-content(html2, as="text")
  32. parsedHtml<-htmlParse(content2,asText=TRUE)
  33.  
  34. # helper functions
  35. # Check for tracking ID in iframe
  36. iframe_checker <- function(tagID) {
  37. src<-xpathApply(parsedHtml, paste0("//iframe[@src='//www.googletagmanager.com/ns.html?id=",tagID,"']"), xmlGetAttr, "src")
  38. print(src)
  39. if ( length(src) == 0 ) {
  40. iFrameResult <-"Not found"
  41. } else {
  42. iFrameResult <-"OK"
  43. }
  44. iFrameResult
  45. }
  46.  
  47. # check for tracking ID in script tag
  48. script_checker <- function(tagID) {
  49. scriptNode<-xpathApply(parsedHtml, "//script", xmlValue)
  50. scripChars<-as.character(scriptNode)
  51. scriptloc<-grep(pattern = tagID, x = scripChars)
  52.  
  53. if ( length(scriptloc) == 0 ) {
  54. scriptResult<-"Not found"
  55. } else {
  56. scriptResult<-"OK"
  57. }
  58. scriptResult
  59. }
  60.  
  61. # Loop through containers and check for GTM in iFrame or script tags
  62. checkResults <- data.frame( url = url, country = urlList$country[i])
  63. for ( container in containers ) {
  64. iFrameStatus <- iframe_checker(container)
  65. scriptStatus <- script_checker(container)
  66. results <- data.frame(cbind(iFrameStatus, scriptStatus))
  67. names(results) <- c(paste0(container, "-iFrame"), paste0(container, "-script"))
  68. checkResults <- cbind(checkResults, results)
  69. }
  70.  
  71. resultsFrame<-rbind(resultsFrame,checkResults)
  72.  
  73. }
  74.  
  75. if ( !file.exists(outputdir) ) {
  76. dir.create(outputdir)
  77. }
  78.  
  79. write.csv(resultsFrame, file=paste0(outputdir,"/GTM check Full Results-",Sys.Date(),".csv") )
  80. message(paste0("Form check complete. Results have been written to:\n", outputdir,"/GTM check Full Results-", Sys.Date(),".csv"))
  81. rm(list=ls())
  82. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement