##### Initialization
cat("Initializing..\n")
rm(list=ls())
test=function() {source("D:\\Jeff\\modis\\script\\downloadMODIS_V1B5.r")}
### change defaults
options(download.file.method="auto")
options(stringsAsFactors=FALSE)
### set user params
# computer params
inpPath="D:\\Jeff\\modis\\inputs\\brisbaneCoord.txt"
wDir="D:\\Jeff\\modis\\temp"
expTifDir="D:\\Jeff\\modis\\outputs\\Geo_TIFF"
expJpgDir="D:\\Jeff\\modis\\outputs\\preview_jpg"
expMetaDir="D:\\Jeff\\modis\\outputs\\metadata"
expMetaPath="D:\\Jeff\\modis\\outputs\\metadata.txt"
mrtPath="C:\\Users\\uqjhans4\\Documents\\MRT\\bin"
mshpPath="D:\\Jeff\\modis\\inputs\\MODIS_GRID_GEO.shp"
## modis params
dataType="MOD13Q1.005"
jpgBrowseName="MOD13A1"
bands="1,0,0,0,0,0,0,0,0,0,0,0" # gets NDVI
esriPrjFile="C:\\Program Files (x86)\\ArcGIS\\Desktop10.0\\Coordinate Systems\\Projected Coordinate Systems\\World\\Mercator (world).prj"
### load in dependencies
require(RCurl)
require(rgdal)
require(rgeos)
require(stringr)
require(XML)
### define built-in functions
wgs84crs=CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
endswith=function(x, char) {
currSub = substr(x, as.numeric(nchar(x)-nchar(char))+1,nchar(x))
if (currSub==char) {return(TRUE)}
return(FALSE)
}
extractFolders=function(urlString) {
htmlString=getURL(urlString)
ret=gsub("]", "", str_replace_all(str_extract_all(htmlString, paste('DIR',".([^]]+).", '/\">',sep=""))[[1]], "[a-zA-Z\"= <>/]", ""))
return(ret[which(nchar(ret)>0)])
}
extractFiles=function(urlString, selHV) {
# get filename strings
htmlString=getURL(urlString)
allVec=gsub('\">', '', gsub('<a href=\"', "", str_extract_all(htmlString, paste('<a href=\"',"([^]]+)", '\">',sep=""))[[1]]))
ret=c()
for (currSel in selHV) {
ret=c(ret, grep(currSel, allVec, value=TRUE))
}
# select specific files
jpg=sapply(ret, FUN=endswith, char=".jpg")
xml=sapply(ret, FUN=endswith, char=".xml")
hdf=sapply(ret, FUN=endswith, char=".hdf")
retList=list(jpg=ret[which(jpg)], xml=ret[which(xml)], hdf=ret[which(hdf)])
return(retList)
}
generatePRM=function(inFile, outFile) {
### generates PRM file to resample to wgs 1984 CRS
filename = paste(wDir, "\\mrt.prm", sep="")
cat(paste('INPUT_FILENAME = ', inFile, "\n", sep=""), file=filename)
cat(paste('SPECTRAL_SUBSET = (',bands,')\n',sep=""), file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat('SPATIAL_SUBSET_TYPE = INPUT_LAT_LONG\n', file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat('SPATIAL_SUBSET_UL_CORNER = ( -27.25 152.665 )\n', file=filename, append=TRUE)
cat('SPATIAL_SUBSET_LR_CORNER = ( -27.66 153.200 )\n', file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat(paste('OUTPUT_FILENAME = ', outFile, "\n", sep=""), file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat('RESAMPLING_TYPE = NN\n', file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
# mercator
cat('OUTPUT_PROJECTION_TYPE = MERCAT\n', file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat('DATUM = WGS84\n', file=filename, append=TRUE)
cat(' \n', file=filename, append=TRUE)
cat('OUTPUT_PROJECTION_PARAMETERS = ( \n', file=filename, append=TRUE)
cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
cat(' 0.0 0.0 1.0\n', file=filename, append=TRUE)
cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
cat(' 0.0 0.0 0.0\n', file=filename, append=TRUE)
cat(' 0.0 0.0 0.0 )\n', file=filename, append=TRUE)
}
parseXML2DF=function(inFile) {
# general formatting for xml
xmlTree = xmlTreeParse(inFile)
xmlParse = unlist(xmlTree, recursive=TRUE)
xmlSub = xmlParse[which(sapply(names(xmlParse), FUN=endswith, char="value"))]
names(xmlSub) = str_replace_all(names(xmlSub), c("doc"), "")
names(xmlSub) = str_replace_all(names(xmlSub), c("children"), "")
names(xmlSub) = str_replace_all(names(xmlSub), c("text"), "")
names(xmlSub) = str_replace_all(names(xmlSub), c("value"), "")
names(xmlSub) = gsub(".", "_", names(xmlSub), fixed=TRUE)
names(xmlSub) = gsub("^_*|(?<=_)_|_*$", "", names(xmlSub), perl=TRUE)
xmlDF=as.data.frame(matrix(xmlSub, nrow=1))
colnames(xmlDF)=names(xmlSub)
xmlDF=xmlDF[,-grep("GranuleURMetaData_InputGranule_InputPointer",names(xmlDF))]
# properly parse the PSA fields
PSANameFieldsPos= grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAName", names(xmlDF))
PSANameFieldsValue = unlist(xmlDF[1,PSANameFieldsPos], recursive=TRUE)
PSAValueFieldsPos=grep("GranuleMetaDataFile_GranuleURMetaData_PSAs_PSA_PSAValue", names(xmlDF))
PSAValueFieldsValue = unlist(xmlDF[1,PSAValueFieldsPos], recursive=TRUE)
xmlDF=xmlDF[,-c(PSANameFieldsPos, PSAValueFieldsPos)]
for (currPos in seq_along(PSANameFieldsPos)) {
xmlDF[PSANameFieldsValue[currPos]] = PSAValueFieldsValue[currPos]
}
# more general field name parsing
names(xmlDF)=gsub("GranuleMetaDataFile_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("GranuleURMetaData_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("SpatialDomainContainer_HorizontalSpatialDomainContainer_GPolygon_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_QAStats_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("MeasuredParameter_MeasuredParameterContainer_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub("QAFlags_", "", names(xmlDF), fixed=TRUE)
names(xmlDF)=gsub(".", "_", names(xmlDF), fixed=TRUE)
# properly parse the QAStats fields
PSA_nfp=grep("ParameterName", names(xmlDF))
PSA_nfv=unlist(xmlDF[,PSA_nfp])
xmlDF=xmlDF[,-PSA_nfp]
PSA_pfp1=grep("QAPercentMissingData",names(xmlDF))
PSA_pfp2=grep("QAPercentOutofBoundsData",names(xmlDF))
PSA_pfp3=grep("QAPercentInterpolatedData",names(xmlDF))
PSA_pfp4=grep("QAPercentCloudCover",names(xmlDF))
PSA_pfp5=grep("AutomaticQualityFlag",names(xmlDF))
PSA_pfp6=grep("AutomaticQualityFlagExplanation",names(xmlDF))
PSA_pfp7=grep("ScienceQualityFlag",names(xmlDF))
PSA_pfp8=grep("ScienceQualityFlagExplanation",names(xmlDF))
for (currPos in seq_along(PSA_nfp)) {
currVal=gsub(" ", "_", PSA_nfv[currPos], fixed=TRUE)
names(xmlDF)[PSA_pfp1[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp1[currPos]],sep="_")
names(xmlDF)[PSA_pfp2[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp2[currPos]],sep="_")
names(xmlDF)[PSA_pfp3[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp3[currPos]],sep="_")
names(xmlDF)[PSA_pfp4[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp4[currPos]],sep="_")
names(xmlDF)[PSA_pfp5[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp5[currPos]],sep="_")
names(xmlDF)[PSA_pfp6[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp6[currPos]],sep="_")
names(xmlDF)[PSA_pfp7[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp7[currPos]],sep="_")
names(xmlDF)[PSA_pfp8[currPos]] = paste("B",currVal,names(xmlDF)[PSA_pfp8[currPos]],sep="_")
}
return(xmlDF)
}
rbind_cust=function(x,y) {
if (paste(names(x), collapse="_") == paste(names(y), collapse="_")) {return(rbind(x,y))}
xUniCols=names(x)[which(!names(x) %in% names(y))]
yUniCols=names(y)[which(!names(y) %in% names(x))]
if (length(xUniCols)>0) {
for (xi in xUniCols) {y[xi]=NA}
}
if (length(yUniCols)>0) {
for (yi in yUniCols) {x[yi]=NA}
}
return(rbind(x,y))
}
#### Preliminary processing
cat("Prpearing data for processing..\n")
### load in data
inpDF=read.table(inpPath, sep=",", header=TRUE, as.is=TRUE)
mrtSHP=suppressWarnings(readOGR(dirname(mshpPath), gsub(".shp", "", basename(mshpPath), fixed=TRUE), verbose=FALSE))
### get list of HV numbers
mrtSHP@data$hv = paste("h", formatC(mrtSHP@data$h, width = 2, format = "d", flag = "0"), "v", formatC(mrtSHP@data$v, width = 2, format = "d", flag = "0"), sep="")
inpSHP=SpatialPoints(coords=data.frame(inpDF$lon, inpDF$lat), proj4string=wgs84crs)
selRows=gContains(mrtSHP, inpSHP, byid=TRUE)
selHVs=unique(mrtSHP@data[which(selRows),"hv"])
### get directories
mainDir=paste("http://e4ftl01.cr.usgs.gov/MOLT/",dataType,"/",sep="")
temporalDirs=paste(mainDir,extractFolders(mainDir),"/",sep="")
dlDir=c(expJpgDir,expMetaDir,wDir)
#### Main processing
### download files
xmlPaths=c()
jpgPaths=c()
for (currDirPos in seq_along(temporalDirs)[1:5]) {
cat(paste("Starting time increment ",currDirPos,"out of",length(temporalDirs),"\n"))
# get files
allFiles=extractFiles(temporalDirs[currDirPos], selHVs)
inFiles=list(jpg=allFiles[[1]], xml=allFiles[[2]], hdf=allFiles[[3]])
outFiles=list()
# download files
cat("\tDownloading files..\n")
for (currFileTypePos in seq_len(3)) {
outFiles[[currFileTypePos]]=gsub(".", "_", basename(inFiles[[currFileTypePos]]), fixed=TRUE)
outFiles[[currFileTypePos]]=paste(dlDir[[currFileTypePos]],"\\",gsub(paste("_", names(inFiles)[currFileTypePos], sep=""), paste(".", names(inFiles)[currFileTypePos], sep=""), outFiles[[currFileTypePos]], fixed=TRUE),sep="")
download.file(url=paste(temporalDirs[currDirPos],inFiles[[currFileTypePos]],sep="")[1], destfile=outFiles[[currFileTypePos]][1], mode="wb", quiet=TRUE)
}
jpgPaths=c(jpgPaths,outFiles[[1]][1])
xmlPaths=c(xmlPaths,outFiles[[2]])
### reformat and project .hdf files
cat("\tReformatting and projecting files..\n")
pb=txtProgressBar(0, length(inFiles[[3]]), 0 ,style=3)
for (currFilePos in seq_along(inFiles[[3]])) {
# generate prm file
inFile=outFiles[[3]][[currFilePos]]
outFile=paste(expTifDir, "\\", gsub(".hdf", ".tif", basename(inFile)), sep="")
prjFile=gsub(".tif", ".prj", outFile)
generatePRM(inFile, outFile)
# execute modis resample tool
system(paste(mrtPath,"\\resample.exe -p ",wDir,"\\mrt.prm",sep=""), show.output.on.console=FALSE)
# copy projection file
file.copy (esriPrjFile, prjFile, overwrite=TRUE)
# update progress bar
setTxtProgressBar(pb, currFilePos)
}
close(pb)
}
### merge all metadatafiles into a single csv
cat("Generating metadata file..\n")
mergedMDF=parseXML2DF(xmlPaths[1])
pb2=txtProgressBar(0, length(xmlPaths), 1, style=3)
counter=1
for (currXml in xmlPaths[-1]) {
# load xml file
currDF=parseXML2DF(currXml)
# add parsed file to df
mergedMDF=rbind_cust(mergedMDF, currDF)
# update progress bar
counter=counter+1
setTxtProgressBar(pb2, counter)
}
close(pb2)
# add in image preview links for use in excel
mergedMDF$preview=paste('=HYPERLINK("',jpgPaths,'", "LINK")', sep="")
# save complete metadata file
write.table(mergedMDF, expMetaPath, sep="\t", row.names=FALSE, quote=FALSE)
# print finishing message
cat("DONE!\n")