Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ##### Initialization
- cat("Initializing..\n")
- rm(list=ls())
- test=function() {source("D:\\Jeff\\modis\\script\\downloadMODIS_V1B5.r")}
- ### change defaults
- options(download.file.method="auto")
- options(stringsAsFactors=FALSE)
- ### set user params
- # computer params
- inpPath="D:\\Jeff\\modis\\inputs\\brisbaneCoord.txt"
- wDir="D:\\Jeff\\modis\\temp"
- expTifDir="D:\\Jeff\\modis\\outputs\\Geo_TIFF"
- expJpgDir="D:\\Jeff\\modis\\outputs\\preview_jpg"
- expMetaDir="D:\\Jeff\\modis\\outputs\\metadata"
- expMetaPath="D:\\Jeff\\modis\\outputs\\metadata.txt"
- mrtPath="C:\\Users\\uqjhans4\\Documents\\MRT\\bin"
- mshpPath="D:\\Jeff\\modis\\inputs\\MODIS_GRID_GEO.shp"
- ## modis params
- dataType="MOD13Q1.005"
- jpgBrowseName="MOD13A1"
- bands="1,0,0,0,0,0,0,0,0,0,0,0" # gets NDVI
- esriPrjFile="C:\\Program Files (x86)\\ArcGIS\\Desktop10.0\\Coordinate Systems\\Projected Coordinate Systems\\World\\Mercator (world).prj"
- ### load in dependencies
- require(RCurl)
- require(rgdal)
- require(rgeos)
- require(stringr)
- require(XML)
- ### define built-in functions
- wgs84crs=CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
- endswith=function(x, char) {
- currSub = substr(x, as.numeric(nchar(x)-nchar(char))+1,nchar(x))
- if (currSub==char) {return(TRUE)}
- return(FALSE)
- }
- extractFolders=function(urlString) {
- htmlString=getURL(urlString)
- ret=gsub("]", "", str_replace_all(str_extract_all(htmlString, paste('DIR',".([^]]+).", '/\">',sep=""))[[1]], "[a-zA-Z\"= <>/]", ""))
- return(ret[which(nchar(ret)>0)])
- }
- extractFiles=function(urlString, selHV) {
- # get filename strings
- htmlString=getURL(urlString)
- allVec=gsub('\">', '', gsub('<a href=\"', "", str_extract_all(htmlString, paste('<a href=\"',"([^]]+)", '\">',sep=""))[[1]]))
- ret=c()
- for (currSel in selHV) {
- ret=c(ret, grep(currSel, allVec, value=TRUE))
- }
- # select specific files
- jpg=sapply(ret, FUN=endswith, char=".jpg")
- xml=sapply(ret, FUN=endswith, char=".xml")
- hdf=sapply(ret, FUN=endswith, char=".hdf")
- retList=list(jpg=ret[which(jpg)], xml=ret[which(xml)], hdf=ret[which(hdf)])
- return(retList)
- }
- generatePRM=function(inFile, outFile) {
- ### generates PRM file to resample to wgs 1984 CRS
- filename = paste(wDir, "\\mrt.prm", sep="")
- cat(paste('INPUT_FILENAME = ', inFile, "\n", sep=""), file=filename)
- cat(paste('SPECTRAL_SUBSET = (',bands,')\n',sep=""), file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat('SPATIAL_SUBSET_TYPE = INPUT_LAT_LONG\n', file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat('SPATIAL_SUBSET_UL_CORNER = ( -27.25 152.665 )\n', file=filename, append=TRUE)
- cat('SPATIAL_SUBSET_LR_CORNER = ( -27.66 153.200 )\n', file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat(paste('OUTPUT_FILENAME = ', outFile, "\n", sep=""), file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat('RESAMPLING_TYPE = NN\n', file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- # mercator
- cat('OUTPUT_PROJECTION_TYPE = MERCAT\n', file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat('DATUM = WGS84\n', file=filename, append=TRUE)
- cat(' \n', file=filename, append=TRUE)
- cat('OUTPUT_PROJECTION_PARAMETERS = ( \n', file=filename, append=TRUE)
- cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
- cat(' 0.0 0.0 1.0\n', file=filename, append=TRUE)
- cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
- cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
- cat(' 0.0 0.0 0.0 )\n', file=filename, append=TRUE)
- }
- parseXML2DF=function(inFile) {
- # general formatting for xml
- xmlTree = xmlTreeParse(inFile)
- xmlParse = unlist(xmlTree, recursive=TRUE)
- xmlSub = xmlParse[which(sapply(names(xmlParse), FUN=endswith, char="value"))]
- names(xmlSub) = str_replace_all(names(xmlSub), c("doc"), "")
- names(xmlSub) = str_replace_all(names(xmlSub), c("children"), "")
- names(xmlSub) = str_replace_all(names(xmlSub), c("text"), "")
- names(xmlSub) = str_replace_all(names(xmlSub), c("value"), "")
- names(xmlSub) = gsub(".", "_", names(xmlSub), fixed=TRUE)
- names(xmlSub) = gsub("^_*|(?<=_)_|_*$", "", names(xmlSub), perl=TRUE)
- xmlDF=as.data.frame(matrix(xmlSub, nrow=1))
- colnames(xmlDF)=names(xmlSub)
- xmlDF=xmlDF[,-grep("GranuleURMetaData_InputGranule_InputPointer",names(xmlDF))]
- # properly parse the PSA fields
- PSANameFieldsPos= grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAName", names(xmlDF))
- PSANameFieldsValue = unlist(xmlDF[1,PSANameFieldsPos], recursive=TRUE)
- PSAValueFieldsPos=grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAValue", names(xmlDF))
- PSAValueFieldsValue = unlist(xmlDF[1,PSAValueFieldsPos], recursive=TRUE)
- xmlDF=xmlDF[,-c(PSANameFieldsPos, PSAValueFieldsPos)]
- for (currPos in seq_along(PSANameFieldsPos)) {
- xmlDF[PSANameFieldsValue[currPos]] = PSAValueFieldsValue[currPos]
- }
- # more general field name parsing
- names(xmlDF)=gsub("GranuleMetaDataFile_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("GranuleURMetaData_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_QAStats_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub("QAFlags_", "", names(xmlDF), fixed=TRUE)
- names(xmlDF)=gsub(".", "_", names(xmlDF), fixed=TRUE)
- # properly parse the QAStats fields
- PSA_nfp=grep("ParameterName", names(xmlDF))
- PSA_nfv=unlist(xmlDF[,PSA_nfp])
- xmlDF=xmlDF[,-PSA_nfp]
- PSA_pfp1=grep("QAPercentMissingData",names(xmlDF))
- PSA_pfp2=grep("QAPercentOutofBoundsData",names(xmlDF))
- PSA_pfp3=grep("QAPercentInterpolatedData",names(xmlDF))
- PSA_pfp4=grep("QAPercentCloudCover",names(xmlDF))
- PSA_pfp5=grep("AutomaticQualityFlag",names(xmlDF))
- PSA_pfp6=grep("AutomaticQualityFlagExplanation",names(xmlDF))
- PSA_pfp7=grep("ScienceQualityFlag",names(xmlDF))
- PSA_pfp8=grep("ScienceQualityFlagExplanation",names(xmlDF))
- for (currPos in seq_along(PSA_nfp)) {
- currVal=gsub(" ", "_", PSA_nfv[currPos], fixed=TRUE)
- names(xmlDF)[PSA_pfp1[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp1[currPos]],sep="_")
- names(xmlDF)[PSA_pfp2[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp2[currPos]],sep="_")
- names(xmlDF)[PSA_pfp3[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp3[currPos]],sep="_")
- names(xmlDF)[PSA_pfp4[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp4[currPos]],sep="_")
- names(xmlDF)[PSA_pfp5[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp5[currPos]],sep="_")
- names(xmlDF)[PSA_pfp6[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp6[currPos]],sep="_")
- names(xmlDF)[PSA_pfp7[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp7[currPos]],sep="_")
- names(xmlDF)[PSA_pfp8[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp8[currPos]],sep="_")
- }
- return(xmlDF)
- }
- rbind_cust=function(x,y) {
- if (paste(names(x), collapse="_") == paste(names(y), collapse="_")) {return(rbind(x,y))}
- xUniCols=names(x)[which(!names(x) %in% names(y))]
- yUniCols=names(y)[which(!names(y) %in% names(x))]
- if (length(xUniCols)>0) {
- for (xi in xUniCols) {y[xi]=NA}
- }
- if (length(yUniCols)>0) {
- for (yi in yUniCols) {x[yi]=NA}
- }
- return(rbind(x,y))
- }
- #### Preliminary processing
- cat("Prpearing data for processing..\n")
- ### load in data
- inpDF=read.table(inpPath, sep=",", header=TRUE, as.is=TRUE)
- mrtSHP=suppressWarnings(readOGR(dirname(mshpPath), gsub(".shp", "", basename(mshpPath), fixed=TRUE), verbose=FALSE))
- ### get list of HV numbers
- mrtSHP@data$hv = paste("h", formatC(mrtSHP@data$h, width = 2, format = "d", flag = "0"), "v", formatC(mrtSHP@data$v, width = 2, format = "d", flag = "0"), sep="")
- inpSHP=SpatialPoints(coords=data.frame(inpDF$lon, inpDF$lat), proj4string=wgs84crs)
- selRows=gContains(mrtSHP, inpSHP, byid=TRUE)
- selHVs=unique(mrtSHP@data[which(selRows),"hv"])
- ### get directories
- mainDir=paste("http://e4ftl01.cr.usgs.gov/MOLT/",dataType,"/",sep="")
- temporalDirs=paste(mainDir,extractFolders(mainDir),"/",sep="")
- dlDir=c(expJpgDir,expMetaDir,wDir)
- #### Main processing
- ### download files
- xmlPaths=c()
- jpgPaths=c()
- for (currDirPos in seq_along(temporalDirs)[1:5]) {
- cat(paste("Starting time increment ",currDirPos,"out of",length(temporalDirs),"\n"))
- # get files
- allFiles=extractFiles(temporalDirs[currDirPos], selHVs)
- inFiles=list(jpg=allFiles[[1]], xml=allFiles[[2]], hdf=allFiles[[3]])
- outFiles=list()
- # download files
- cat("\tDownloading files..\n")
- for (currFileTypePos in seq_len(3)) {
- outFiles[[currFileTypePos]]=gsub(".", "_", basename(inFiles[[currFileTypePos]]), fixed=TRUE)
- outFiles[[currFileTypePos]]=paste(dlDir[[currFileTypePos]],"\\",gsub(paste("_", names(inFiles)[currFileTypePos], sep=""), paste(".", names(inFiles)[currFileTypePos], sep=""), outFiles[[currFileTypePos]], fixed=TRUE),sep="")
- download.file(url=paste(temporalDirs[currDirPos],inFiles[[currFileTypePos]],sep="")[1], destfile=outFiles[[currFileTypePos]][1], mode="wb", quiet=TRUE)
- }
- jpgPaths=c(jpgPaths,outFiles[[1]][1])
- xmlPaths=c(xmlPaths,outFiles[[2]])
- ### reformat and project .hdf files
- cat("\tReformatting and projecting files..\n")
- pb=txtProgressBar(0, length(inFiles[[3]]), 0 ,style=3)
- for (currFilePos in seq_along(inFiles[[3]])) {
- # generate prm file
- inFile=outFiles[[3]][[currFilePos]]
- outFile=paste(expTifDir, "\\", gsub(".hdf", ".tif", basename(inFile)), sep="")
- prjFile=gsub(".tif", ".prj", outFile)
- generatePRM(inFile, outFile)
- # execute modis resample tool
- system(paste(mrtPath,"\\resample.exe -p ",wDir,"\\mrt.prm",sep=""), show.output.on.console=FALSE)
- # copy projection file
- file.copy (esriPrjFile, prjFile, overwrite=TRUE)
- # update progress bar
- setTxtProgressBar(pb, currFilePos)
- }
- close(pb)
- }
- ### merge all metadatafiles into a single csv
- cat("Generating metadata file..\n")
- mergedMDF=parseXML2DF(xmlPaths[1])
- pb2=txtProgressBar(0, length(xmlPaths), 1, style=3)
- counter=1
- for (currXml in xmlPaths[-1]) {
- # load xml file
- currDF=parseXML2DF(currXml)
- # add parsed file to df
- mergedMDF=rbind_cust(mergedMDF, currDF)
- # update progress bar
- counter=counter+1
- setTxtProgressBar(pb2, counter)
- }
- close(pb2)
- # add in image preview links for use in excel
- mergedMDF$preview=paste('=HYPERLINK("',jpgPaths,'", "LINK")', sep="")
- # save complete metadata file
- write.table(mergedMDF, expMetaPath, sep="\t", row.names=FALSE, quote=FALSE)
- # print finishing message
- cat("DONE!\n")
Add Comment
Please, Sign In to add comment