Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

R code to download MODIS via HTTP

By: a guest on Aug 3rd, 2013  |  syntax: None  |  size: 10.58 KB  |  views: 93  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. ##### Initialization
  2. cat("Initializing..\n")
  3. rm(list=ls())
  4. test=function() {source("D:\\Jeff\\modis\\script\\downloadMODIS_V1B5.r")}
  5.  
  6. ### change defaults
  7. options(download.file.method="auto")
  8. options(stringsAsFactors=FALSE)
  9.  
  10. ### set user params
  11. # computer params
  12. inpPath="D:\\Jeff\\modis\\inputs\\brisbaneCoord.txt"
  13. wDir="D:\\Jeff\\modis\\temp"
  14. expTifDir="D:\\Jeff\\modis\\outputs\\Geo_TIFF"
  15. expJpgDir="D:\\Jeff\\modis\\outputs\\preview_jpg"
  16. expMetaDir="D:\\Jeff\\modis\\outputs\\metadata"
  17. expMetaPath="D:\\Jeff\\modis\\outputs\\metadata.txt"
  18. mrtPath="C:\\Users\\uqjhans4\\Documents\\MRT\\bin"
  19. mshpPath="D:\\Jeff\\modis\\inputs\\MODIS_GRID_GEO.shp"
  20.  
  21. ## modis params
  22. dataType="MOD13Q1.005"
  23. jpgBrowseName="MOD13A1"
  24. bands="1,0,0,0,0,0,0,0,0,0,0,0" # gets NDVI
  25. esriPrjFile="C:\\Program Files (x86)\\ArcGIS\\Desktop10.0\\Coordinate Systems\\Projected Coordinate Systems\\World\\Mercator (world).prj"
  26.  
  27. ### load in dependencies
  28. require(RCurl)
  29. require(rgdal)
  30. require(rgeos)
  31. require(stringr)
  32. require(XML)
  33.  
  34. ### define built-in functions
  35. wgs84crs=CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
  36.  
  37. endswith=function(x, char) {
  38.     currSub = substr(x, as.numeric(nchar(x)-nchar(char))+1,nchar(x))
  39.     if (currSub==char) {return(TRUE)}
  40.     return(FALSE)
  41. }
  42.  
  43. extractFolders=function(urlString) {
  44.         htmlString=getURL(urlString)
  45.         ret=gsub("]", "", str_replace_all(str_extract_all(htmlString, paste('DIR',".([^]]+).", '/\">',sep=""))[[1]], "[a-zA-Z\"= <>/]", ""))
  46.         return(ret[which(nchar(ret)>0)])
  47. }
  48.  
  49. extractFiles=function(urlString, selHV) {
  50.         # get filename strings
  51.         htmlString=getURL(urlString)
  52.         allVec=gsub('\">', '', gsub('<a href=\"', "", str_extract_all(htmlString, paste('<a href=\"',"([^]]+)", '\">',sep=""))[[1]]))
  53.         ret=c()
  54.         for (currSel in selHV) {
  55.                 ret=c(ret, grep(currSel, allVec, value=TRUE))
  56.         }
  57.         # select specific files
  58.         jpg=sapply(ret, FUN=endswith, char=".jpg")
  59.         xml=sapply(ret, FUN=endswith, char=".xml")
  60.         hdf=sapply(ret, FUN=endswith, char=".hdf")
  61.         retList=list(jpg=ret[which(jpg)], xml=ret[which(xml)], hdf=ret[which(hdf)])
  62.         return(retList)
  63. }
  64.  
  65. generatePRM=function(inFile, outFile) {
  66.         ### generates PRM file to resample to wgs 1984 CRS
  67.         filename = paste(wDir, "\\mrt.prm", sep="")
  68.         cat(paste('INPUT_FILENAME = ', inFile, "\n", sep=""), file=filename)
  69.         cat(paste('SPECTRAL_SUBSET = (',bands,')\n',sep=""), file=filename, append=TRUE)
  70.         cat('  \n', file=filename, append=TRUE)
  71.         cat('SPATIAL_SUBSET_TYPE = INPUT_LAT_LONG\n', file=filename, append=TRUE)
  72.         cat('  \n', file=filename, append=TRUE)
  73.         cat('SPATIAL_SUBSET_UL_CORNER = ( -27.25 152.665 )\n', file=filename, append=TRUE)
  74.         cat('SPATIAL_SUBSET_LR_CORNER = ( -27.66 153.200 )\n', file=filename, append=TRUE)
  75.         cat('  \n', file=filename, append=TRUE)
  76.         cat(paste('OUTPUT_FILENAME = ', outFile, "\n", sep=""), file=filename, append=TRUE)
  77.         cat('  \n', file=filename, append=TRUE)
  78.         cat('RESAMPLING_TYPE = NN\n', file=filename, append=TRUE)
  79.         cat('  \n', file=filename, append=TRUE)
  80.        
  81.         # mercator
  82.         cat('OUTPUT_PROJECTION_TYPE = MERCAT\n', file=filename, append=TRUE)
  83.         cat('  \n', file=filename, append=TRUE)
  84.         cat('DATUM = WGS84\n', file=filename, append=TRUE)
  85.         cat('  \n', file=filename, append=TRUE)
  86.         cat('OUTPUT_PROJECTION_PARAMETERS = ( \n', file=filename, append=TRUE)
  87.         cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  88.         cat(' 0.0 0.0 1.0\n', file=filename, append=TRUE)
  89.         cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  90.         cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
  91.         cat(' 0.0 0.0 0.0 )\n', file=filename, append=TRUE)            
  92. }
  93.  
  94.  
  95. parseXML2DF=function(inFile) {
  96.         # general formatting for xml
  97.         xmlTree = xmlTreeParse(inFile)
  98.         xmlParse = unlist(xmlTree, recursive=TRUE)
  99.         xmlSub = xmlParse[which(sapply(names(xmlParse), FUN=endswith, char="value"))]
  100.         names(xmlSub) = str_replace_all(names(xmlSub), c("doc"), "")
  101.         names(xmlSub) = str_replace_all(names(xmlSub), c("children"), "")
  102.         names(xmlSub) = str_replace_all(names(xmlSub), c("text"), "")
  103.         names(xmlSub) = str_replace_all(names(xmlSub), c("value"), "")
  104.         names(xmlSub) = gsub(".", "_", names(xmlSub), fixed=TRUE)
  105.         names(xmlSub) = gsub("^_*|(?<=_)_|_*$", "", names(xmlSub), perl=TRUE)
  106.         xmlDF=as.data.frame(matrix(xmlSub, nrow=1))
  107.         colnames(xmlDF)=names(xmlSub)
  108.         xmlDF=xmlDF[,-grep("GranuleURMetaData_InputGranule_InputPointer",names(xmlDF))]
  109.         # properly parse the PSA fields
  110.         PSANameFieldsPos= grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAName", names(xmlDF))
  111.         PSANameFieldsValue = unlist(xmlDF[1,PSANameFieldsPos], recursive=TRUE)
  112.         PSAValueFieldsPos=grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAValue", names(xmlDF))
  113.         PSAValueFieldsValue = unlist(xmlDF[1,PSAValueFieldsPos], recursive=TRUE)               
  114.         xmlDF=xmlDF[,-c(PSANameFieldsPos, PSAValueFieldsPos)]
  115.         for (currPos in seq_along(PSANameFieldsPos)) {
  116.                 xmlDF[PSANameFieldsValue[currPos]] = PSAValueFieldsValue[currPos]
  117.         }
  118.         # more general field name parsing
  119.         names(xmlDF)=gsub("GranuleMetaDataFile_", "", names(xmlDF), fixed=TRUE)
  120.         names(xmlDF)=gsub("GranuleURMetaData_", "", names(xmlDF), fixed=TRUE)
  121.         names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
  122.         names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
  123.         names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_QAStats_", "", names(xmlDF), fixed=TRUE)
  124.         names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_", "", names(xmlDF), fixed=TRUE)
  125.         names(xmlDF)=gsub("QAFlags_", "", names(xmlDF), fixed=TRUE)
  126.         names(xmlDF)=gsub(".", "_", names(xmlDF), fixed=TRUE)
  127.         # properly parse the QAStats fields
  128.         PSA_nfp=grep("ParameterName", names(xmlDF))
  129.         PSA_nfv=unlist(xmlDF[,PSA_nfp])
  130.         xmlDF=xmlDF[,-PSA_nfp]
  131.         PSA_pfp1=grep("QAPercentMissingData",names(xmlDF))
  132.         PSA_pfp2=grep("QAPercentOutofBoundsData",names(xmlDF))
  133.         PSA_pfp3=grep("QAPercentInterpolatedData",names(xmlDF))
  134.         PSA_pfp4=grep("QAPercentCloudCover",names(xmlDF))
  135.         PSA_pfp5=grep("AutomaticQualityFlag",names(xmlDF))
  136.         PSA_pfp6=grep("AutomaticQualityFlagExplanation",names(xmlDF))
  137.         PSA_pfp7=grep("ScienceQualityFlag",names(xmlDF))
  138.         PSA_pfp8=grep("ScienceQualityFlagExplanation",names(xmlDF))
  139.         for (currPos in seq_along(PSA_nfp)) {
  140.                 currVal=gsub(" ", "_", PSA_nfv[currPos], fixed=TRUE)
  141.                 names(xmlDF)[PSA_pfp1[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp1[currPos]],sep="_")
  142.                 names(xmlDF)[PSA_pfp2[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp2[currPos]],sep="_")
  143.                 names(xmlDF)[PSA_pfp3[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp3[currPos]],sep="_")
  144.                 names(xmlDF)[PSA_pfp4[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp4[currPos]],sep="_")
  145.                 names(xmlDF)[PSA_pfp5[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp5[currPos]],sep="_")
  146.                 names(xmlDF)[PSA_pfp6[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp6[currPos]],sep="_")
  147.                 names(xmlDF)[PSA_pfp7[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp7[currPos]],sep="_")
  148.                 names(xmlDF)[PSA_pfp8[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp8[currPos]],sep="_")
  149.         }
  150.        
  151.         return(xmlDF)
  152. }
  153.  
  154. rbind_cust=function(x,y) {
  155.         if (paste(names(x), collapse="_") == paste(names(y), collapse="_")) {return(rbind(x,y))}
  156.         xUniCols=names(x)[which(!names(x) %in% names(y))]
  157.         yUniCols=names(y)[which(!names(y) %in% names(x))]
  158.         if (length(xUniCols)>0) {
  159.                 for (xi in xUniCols) {y[xi]=NA}
  160.         }
  161.         if (length(yUniCols)>0) {
  162.                 for (yi in yUniCols) {x[yi]=NA}
  163.         }
  164.         return(rbind(x,y))
  165. }
  166.  
  167. #### Preliminary processing
  168. cat("Prpearing data for processing..\n")
  169. ### load in data
  170. inpDF=read.table(inpPath, sep=",", header=TRUE, as.is=TRUE)
  171. mrtSHP=suppressWarnings(readOGR(dirname(mshpPath), gsub(".shp", "", basename(mshpPath), fixed=TRUE), verbose=FALSE))
  172.  
  173. ### get list of HV numbers
  174. mrtSHP@data$hv = paste("h", formatC(mrtSHP@data$h, width = 2, format = "d", flag = "0"), "v", formatC(mrtSHP@data$v, width = 2, format = "d", flag = "0"), sep="")
  175. inpSHP=SpatialPoints(coords=data.frame(inpDF$lon, inpDF$lat), proj4string=wgs84crs)
  176. selRows=gContains(mrtSHP, inpSHP, byid=TRUE)
  177. selHVs=unique(mrtSHP@data[which(selRows),"hv"])
  178.  
  179. ### get directories
  180. mainDir=paste("http://e4ftl01.cr.usgs.gov/MOLT/",dataType,"/",sep="")
  181. temporalDirs=paste(mainDir,extractFolders(mainDir),"/",sep="")
  182. dlDir=c(expJpgDir,expMetaDir,wDir)
  183.  
  184. #### Main processing
  185. ### download files
  186. xmlPaths=c()
  187. jpgPaths=c()
  188. for (currDirPos in seq_along(temporalDirs)[1:5]) {
  189.        
  190.         cat(paste("Starting time increment ",currDirPos,"out of",length(temporalDirs),"\n"))
  191.         # get files
  192.         allFiles=extractFiles(temporalDirs[currDirPos], selHVs)
  193.         inFiles=list(jpg=allFiles[[1]], xml=allFiles[[2]], hdf=allFiles[[3]])
  194.         outFiles=list()
  195.        
  196.         # download files
  197.         cat("\tDownloading files..\n")
  198.         for (currFileTypePos in seq_len(3)) {
  199.                 outFiles[[currFileTypePos]]=gsub(".", "_", basename(inFiles[[currFileTypePos]]), fixed=TRUE)
  200.                 outFiles[[currFileTypePos]]=paste(dlDir[[currFileTypePos]],"\\",gsub(paste("_", names(inFiles)[currFileTypePos], sep=""), paste(".", names(inFiles)[currFileTypePos], sep=""), outFiles[[currFileTypePos]], fixed=TRUE),sep="")
  201.                 download.file(url=paste(temporalDirs[currDirPos],inFiles[[currFileTypePos]],sep="")[1], destfile=outFiles[[currFileTypePos]][1], mode="wb", quiet=TRUE)
  202.         }
  203.         jpgPaths=c(jpgPaths,outFiles[[1]][1])
  204.         xmlPaths=c(xmlPaths,outFiles[[2]])
  205.  
  206.         ### reformat and project .hdf files
  207.         cat("\tReformatting and projecting files..\n")
  208.         pb=txtProgressBar(0, length(inFiles[[3]]), 0 ,style=3)
  209.         for (currFilePos in seq_along(inFiles[[3]])) {
  210.                 # generate prm file
  211.                 inFile=outFiles[[3]][[currFilePos]]
  212.                 outFile=paste(expTifDir, "\\", gsub(".hdf", ".tif", basename(inFile)), sep="")
  213.                 prjFile=gsub(".tif", ".prj", outFile)
  214.                 generatePRM(inFile, outFile)
  215.        
  216.                 # execute modis resample tool
  217.                 system(paste(mrtPath,"\\resample.exe -p ",wDir,"\\mrt.prm",sep=""), show.output.on.console=FALSE)
  218.                
  219.                 # copy projection file
  220.                 file.copy (esriPrjFile, prjFile, overwrite=TRUE)
  221.                
  222.                 # update progress bar
  223.                 setTxtProgressBar(pb, currFilePos)
  224.         }
  225.         close(pb)
  226. }
  227.  
  228. ### merge all metadatafiles into a single csv
  229. cat("Generating metadata file..\n")
  230. mergedMDF=parseXML2DF(xmlPaths[1])
  231. pb2=txtProgressBar(0, length(xmlPaths), 1, style=3)
  232. counter=1
  233. for (currXml in xmlPaths[-1]) {
  234.         # load xml file
  235.         currDF=parseXML2DF(currXml)
  236.        
  237.         # add parsed file to df
  238.         mergedMDF=rbind_cust(mergedMDF, currDF)
  239.        
  240.         # update progress bar
  241.         counter=counter+1
  242.         setTxtProgressBar(pb2, counter)
  243. }
  244. close(pb2)
  245.  
  246. # add in image preview links for use in excel
  247. mergedMDF$preview=paste('=HYPERLINK("',jpgPaths,'", "LINK")', sep="")
  248.  
  249. # save complete metadata file
  250. write.table(mergedMDF, expMetaPath, sep="\t", row.names=FALSE, quote=FALSE)
  251.  
  252. # print finishing message
  253. cat("DONE!\n")
clone this paste RAW Paste Data