1. ##### Initialization
  2. cat("Initializing..\n")
  3. rm(list=ls())
  4. test=function() {source("D:\\Jeff\\modis\\script\\downloadMODIS_V1B5.r")}
  5.  
  6. ### change defaults
  7. options(download.file.method="auto")
  8. options(stringsAsFactors=FALSE)
  9.  
  10. ### set user params
  11. # computer params
  12. inpPath="D:\\Jeff\\modis\\inputs\\brisbaneCoord.txt"
  13. wDir="D:\\Jeff\\modis\\temp"
  14. expTifDir="D:\\Jeff\\modis\\outputs\\Geo_TIFF"
  15. expJpgDir="D:\\Jeff\\modis\\outputs\\preview_jpg"
  16. expMetaDir="D:\\Jeff\\modis\\outputs\\metadata"
  17. expMetaPath="D:\\Jeff\\modis\\outputs\\metadata.txt"
  18. mrtPath="C:\\Users\\uqjhans4\\Documents\\MRT\\bin"
  19. mshpPath="D:\\Jeff\\modis\\inputs\\MODIS_GRID_GEO.shp"
  20.  
  21. ## modis params
  22. dataType="MOD13Q1.005"
  23. jpgBrowseName="MOD13A1"
  24. bands="1,0,0,0,0,0,0,0,0,0,0,0" # gets NDVI
  25. esriPrjFile="C:\\Program Files (x86)\\ArcGIS\\Desktop10.0\\Coordinate Systems\\Projected Coordinate Systems\\World\\Mercator (world).prj"
  26.  
  27. ### load in dependencies
  28. require(RCurl)
  29. require(rgdal)
  30. require(rgeos)
  31. require(stringr)
  32. require(XML)
  33.  
  34. ### define built-in functions
  35. wgs84crs=CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
  36.  
  37. endswith=function(x, char) {
  38. currSub = substr(x, as.numeric(nchar(x)-nchar(char))+1,nchar(x))
  39. if (currSub==char) {return(TRUE)}
  40. return(FALSE)
  41. }
  42.  
  43. extractFolders=function(urlString) {
  44. htmlString=getURL(urlString)
  45. ret=gsub("]", "", str_replace_all(str_extract_all(htmlString, paste('DIR',".([^]]+).", '/\">',sep=""))[[1]], "[a-zA-Z\"= <>/]", ""))
  46. return(ret[which(nchar(ret)>0)])
  47. }
  48.  
  49. extractFiles=function(urlString, selHV) {
  50. # get filename strings
  51. htmlString=getURL(urlString)
  52. allVec=gsub('\">', '', gsub('<a href=\"', "", str_extract_all(htmlString, paste('<a href=\"',"([^]]+)", '\">',sep=""))[[1]]))
  53. ret=c()
  54. for (currSel in selHV) {
  55. ret=c(ret, grep(currSel, allVec, value=TRUE))
  56. }
  57. # select specific files
  58. jpg=sapply(ret, FUN=endswith, char=".jpg")
  59. xml=sapply(ret, FUN=endswith, char=".xml")
  60. hdf=sapply(ret, FUN=endswith, char=".hdf")
  61. retList=list(jpg=ret[which(jpg)], xml=ret[which(xml)], hdf=ret[which(hdf)])
  62. return(retList)
  63. }
  64.  
  65. generatePRM=function(inFile, outFile) {
  66. ### generates PRM file to resample to wgs 1984 CRS
  67. filename = paste(wDir, "\\mrt.prm", sep="")
  68. cat(paste('INPUT_FILENAME = ', inFile, "\n", sep=""), file=filename)
  69. cat(paste('SPECTRAL_SUBSET = (',bands,')\n',sep=""), file=filename, append=TRUE)
  70. cat(' \n', file=filename, append=TRUE)
  71. cat('SPATIAL_SUBSET_TYPE = INPUT_LAT_LONG\n', file=filename, append=TRUE)
  72. cat(' \n', file=filename, append=TRUE)
  73. cat('SPATIAL_SUBSET_UL_CORNER = ( -27.25 152.665 )\n', file=filename, append=TRUE)
  74. cat('SPATIAL_SUBSET_LR_CORNER = ( -27.66 153.200 )\n', file=filename, append=TRUE)
  75. cat(' \n', file=filename, append=TRUE)
  76. cat(paste('OUTPUT_FILENAME = ', outFile, "\n", sep=""), file=filename, append=TRUE)
  77. cat(' \n', file=filename, append=TRUE)
  78. cat('RESAMPLING_TYPE = NN\n', file=filename, append=TRUE)
  79. cat(' \n', file=filename, append=TRUE)
  80.  
  81. # mercator
  82. cat('OUTPUT_PROJECTION_TYPE = MERCAT\n', file=filename, append=TRUE)
  83. cat(' \n', file=filename, append=TRUE)
  84. cat('DATUM = WGS84\n', file=filename, append=TRUE)
  85. cat(' \n', file=filename, append=TRUE)
  86. cat('OUTPUT_PROJECTION_PARAMETERS = ( \n', file=filename, append=TRUE)
  87. cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  88. cat(' 0.0 0.0 1.0\n', file=filename, append=TRUE)
  89. cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  90. cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  91. cat(' 0.0 0.0 0.0 )\n', file=filename, append=TRUE)
  92. }
  93.  
  94.  
  95. parseXML2DF=function(inFile) {
  96. # general formatting for xml
  97. xmlTree = xmlTreeParse(inFile)
  98. xmlParse = unlist(xmlTree, recursive=TRUE)
  99. xmlSub = xmlParse[which(sapply(names(xmlParse), FUN=endswith, char="value"))]
  100. names(xmlSub) = str_replace_all(names(xmlSub), c("doc"), "")
  101. names(xmlSub) = str_replace_all(names(xmlSub), c("children"), "")
  102. names(xmlSub) = str_replace_all(names(xmlSub), c("text"), "")
  103. names(xmlSub) = str_replace_all(names(xmlSub), c("value"), "")
  104. names(xmlSub) = gsub(".", "_", names(xmlSub), fixed=TRUE)
  105. names(xmlSub) = gsub("^_*|(?<=_)_|_*$", "", names(xmlSub), perl=TRUE)
  106. xmlDF=as.data.frame(matrix(xmlSub, nrow=1))
  107. colnames(xmlDF)=names(xmlSub)
  108. xmlDF=xmlDF[,-grep("GranuleURMetaData_InputGranule_InputPointer",names(xmlDF))]
  109. # properly parse the PSA fields
  110. PSANameFieldsPos= grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAName", names(xmlDF))
  111. PSANameFieldsValue = unlist(xmlDF[1,PSANameFieldsPos], recursive=TRUE)
  112. PSAValueFieldsPos=grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAValue", names(xmlDF))
  113. PSAValueFieldsValue = unlist(xmlDF[1,PSAValueFieldsPos], recursive=TRUE)
  114. xmlDF=xmlDF[,-c(PSANameFieldsPos, PSAValueFieldsPos)]
  115. for (currPos in seq_along(PSANameFieldsPos)) {
  116. xmlDF[PSANameFieldsValue[currPos]] = PSAValueFieldsValue[currPos]
  117. }
  118. # more general field name parsing
  119. names(xmlDF)=gsub("GranuleMetaDataFile_", "", names(xmlDF), fixed=TRUE)
  120. names(xmlDF)=gsub("GranuleURMetaData_", "", names(xmlDF), fixed=TRUE)
  121. names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
  122. names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
  123. names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_QAStats_", "", names(xmlDF), fixed=TRUE)
  124. names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_", "", names(xmlDF), fixed=TRUE)
  125. names(xmlDF)=gsub("QAFlags_", "", names(xmlDF), fixed=TRUE)
  126. names(xmlDF)=gsub(".", "_", names(xmlDF), fixed=TRUE)
  127. # properly parse the QAStats fields
  128. PSA_nfp=grep("ParameterName", names(xmlDF))
  129. PSA_nfv=unlist(xmlDF[,PSA_nfp])
  130. xmlDF=xmlDF[,-PSA_nfp]
  131. PSA_pfp1=grep("QAPercentMissingData",names(xmlDF))
  132. PSA_pfp2=grep("QAPercentOutofBoundsData",names(xmlDF))
  133. PSA_pfp3=grep("QAPercentInterpolatedData",names(xmlDF))
  134. PSA_pfp4=grep("QAPercentCloudCover",names(xmlDF))
  135. PSA_pfp5=grep("AutomaticQualityFlag",names(xmlDF))
  136. PSA_pfp6=grep("AutomaticQualityFlagExplanation",names(xmlDF))
  137. PSA_pfp7=grep("ScienceQualityFlag",names(xmlDF))
  138. PSA_pfp8=grep("ScienceQualityFlagExplanation",names(xmlDF))
  139. for (currPos in seq_along(PSA_nfp)) {
  140. currVal=gsub(" ", "_", PSA_nfv[currPos], fixed=TRUE)
  141. names(xmlDF)[PSA_pfp1[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp1[currPos]],sep="_")
  142. names(xmlDF)[PSA_pfp2[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp2[currPos]],sep="_")
  143. names(xmlDF)[PSA_pfp3[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp3[currPos]],sep="_")
  144. names(xmlDF)[PSA_pfp4[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp4[currPos]],sep="_")
  145. names(xmlDF)[PSA_pfp5[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp5[currPos]],sep="_")
  146. names(xmlDF)[PSA_pfp6[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp6[currPos]],sep="_")
  147. names(xmlDF)[PSA_pfp7[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp7[currPos]],sep="_")
  148. names(xmlDF)[PSA_pfp8[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp8[currPos]],sep="_")
  149. }
  150.  
  151. return(xmlDF)
  152. }
  153.  
  154. rbind_cust=function(x,y) {
  155. if (paste(names(x), collapse="_") == paste(names(y), collapse="_")) {return(rbind(x,y))}
  156. xUniCols=names(x)[which(!names(x) %in% names(y))]
  157. yUniCols=names(y)[which(!names(y) %in% names(x))]
  158. if (length(xUniCols)>0) {
  159. for (xi in xUniCols) {y[xi]=NA}
  160. }
  161. if (length(yUniCols)>0) {
  162. for (yi in yUniCols) {x[yi]=NA}
  163. }
  164. return(rbind(x,y))
  165. }
  166.  
  167. #### Preliminary processing
  168. cat("Prpearing data for processing..\n")
  169. ### load in data
  170. inpDF=read.table(inpPath, sep=",", header=TRUE, as.is=TRUE)
  171. mrtSHP=suppressWarnings(readOGR(dirname(mshpPath), gsub(".shp", "", basename(mshpPath), fixed=TRUE), verbose=FALSE))
  172.  
  173. ### get list of HV numbers
  174. mrtSHP@data$hv = paste("h", formatC(mrtSHP@data$h, width = 2, format = "d", flag = "0"), "v", formatC(mrtSHP@data$v, width = 2, format = "d", flag = "0"), sep="")
  175. inpSHP=SpatialPoints(coords=data.frame(inpDF$lon, inpDF$lat), proj4string=wgs84crs)
  176. selRows=gContains(mrtSHP, inpSHP, byid=TRUE)
  177. selHVs=unique(mrtSHP@data[which(selRows),"hv"])
  178.  
  179. ### get directories
  180. mainDir=paste("http://e4ftl01.cr.usgs.gov/MOLT/",dataType,"/",sep="")
  181. temporalDirs=paste(mainDir,extractFolders(mainDir),"/",sep="")
  182. dlDir=c(expJpgDir,expMetaDir,wDir)
  183.  
  184. #### Main processing
  185. ### download files
  186. xmlPaths=c()
  187. jpgPaths=c()
  188. for (currDirPos in seq_along(temporalDirs)[1:5]) {
  189.  
  190. cat(paste("Starting time increment ",currDirPos,"out of",length(temporalDirs),"\n"))
  191. # get files
  192. allFiles=extractFiles(temporalDirs[currDirPos], selHVs)
  193. inFiles=list(jpg=allFiles[[1]], xml=allFiles[[2]], hdf=allFiles[[3]])
  194. outFiles=list()
  195.  
  196. # download files
  197. cat("\tDownloading files..\n")
  198. for (currFileTypePos in seq_len(3)) {
  199. outFiles[[currFileTypePos]]=gsub(".", "_", basename(inFiles[[currFileTypePos]]), fixed=TRUE)
  200. outFiles[[currFileTypePos]]=paste(dlDir[[currFileTypePos]],"\\",gsub(paste("_", names(inFiles)[currFileTypePos], sep=""), paste(".", names(inFiles)[currFileTypePos], sep=""), outFiles[[currFileTypePos]], fixed=TRUE),sep="")
  201. download.file(url=paste(temporalDirs[currDirPos],inFiles[[currFileTypePos]],sep="")[1], destfile=outFiles[[currFileTypePos]][1], mode="wb", quiet=TRUE)
  202. }
  203. jpgPaths=c(jpgPaths,outFiles[[1]][1])
  204. xmlPaths=c(xmlPaths,outFiles[[2]])
  205.  
  206. ### reformat and project .hdf files
  207. cat("\tReformatting and projecting files..\n")
  208. pb=txtProgressBar(0, length(inFiles[[3]]), 0 ,style=3)
  209. for (currFilePos in seq_along(inFiles[[3]])) {
  210. # generate prm file
  211. inFile=outFiles[[3]][[currFilePos]]
  212. outFile=paste(expTifDir, "\\", gsub(".hdf", ".tif", basename(inFile)), sep="")
  213. prjFile=gsub(".tif", ".prj", outFile)
  214. generatePRM(inFile, outFile)
  215.  
  216. # execute modis resample tool
  217. system(paste(mrtPath,"\\resample.exe -p ",wDir,"\\mrt.prm",sep=""), show.output.on.console=FALSE)
  218.  
  219. # copy projection file
  220. file.copy (esriPrjFile, prjFile, overwrite=TRUE)
  221.  
  222. # update progress bar
  223. setTxtProgressBar(pb, currFilePos)
  224. }
  225. close(pb)
  226. }
  227.  
  228. ### merge all metadatafiles into a single csv
  229. cat("Generating metadata file..\n")
  230. mergedMDF=parseXML2DF(xmlPaths[1])
  231. pb2=txtProgressBar(0, length(xmlPaths), 1, style=3)
  232. counter=1
  233. for (currXml in xmlPaths[-1]) {
  234. # load xml file
  235. currDF=parseXML2DF(currXml)
  236.  
  237. # add parsed file to df
  238. mergedMDF=rbind_cust(mergedMDF, currDF)
  239.  
  240. # update progress bar
  241. counter=counter+1
  242. setTxtProgressBar(pb2, counter)
  243. }
  244. close(pb2)
  245.  
  246. # add in image preview links for use in excel
  247. mergedMDF$preview=paste('=HYPERLINK("',jpgPaths,'", "LINK")', sep="")
  248.  
  249. # save complete metadata file
  250. write.table(mergedMDF, expMetaPath, sep="\t", row.names=FALSE, quote=FALSE)
  251.  
  252. # print finishing message
  253. cat("DONE!\n")