Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- title: "Plot Reduce Diagnostic Report (`r jobslug`)"
- author: "SilviaTerra, LLC"
- date: "`r as.character(format(Sys.time(), format = '%B %d %Y %X'))`"
- output: html_document
- ---
- ```{r setup, include = F}
- library(boot)
- library(dplyr)
- library(doParallel)
- library(ggplot2)
- library(glmnet)
- library(lme4)
- library(nnet)
- library(parallel)
- library(pander)
- library(randomForest)
- library(tidyr)
- library(tsalgo)
- library(tsutils)
- library(stheight)
- library(stvolume)
- library(rstanarm)
- options(mc.cores = parallel::detectCores() - 2)
- library(bayesplot)
- opts_chunk$set(echo = FALSE,
- message = FALSE,
- warning = FALSE,
- error = FALSE,
- cache = TRUE,
- dev = 'png',
- fig.path = figureDir,
- cache.path = cacheDir,
- fig.width = 10,
- fig.height = 10,
- results = 'asis',
- comment = NA)
- options(show.signif.stars = FALSE)
- ```
- ```{r debug, eval = F}
- jobslug <- 'hfm-2018-f1-cc'
- get_prepped(jobslug, setup = T)
- groupSize <- 5
- ```
- ```{r dataPrep, include = FALSE}
- cruiseData <- read_cruiseDat(cruiseJson)
- #filter this two different ways
- cruiseDataNoZero <- cruiseData %>%
- filter(num_records > 0) %>%
- filter(!is.na(diameter))
- trainingData <- read_td(trainingFile)
- trainingData$tph <- as.integer(trainingData$tph)
- ######## Define predictors for models
- distroVarsIDX <- grep("diam", x = names(trainingData))
- impVarsIDX <- grep("spp", x = names(trainingData))
- impVarsIDX <- impVarsIDX[!grepl('meta', names(trainingData)[impVarsIDX])]
- pcScoresIDXRaw <- grep('pca', x = names(trainingData))
- nonPcas <- grep('nonpca', x = names(trainingData))
- pcScoresIDX <- pcScoresIDXRaw[pcScoresIDXRaw %nin% nonPcas]
- defaultPredictors <- paste0('pca', 1:length(pcScoresIDX), collapse = '+')
- #Pull in the HFM strata table
- system('s3cmd get s3://silviaterra-sequoia/hfm-2018/hfm-strata-tab.csv /tmp/')
- stratTab <- read.csv('/tmp/hfm-strata-tab.csv')
- #Modify **AND RUN** to define properly!
- transform_the_predictors <- function(pixeldataframe) {
- #add any necessary transformations to the X variables
- # for example:
- #pixeldataframe$pca1cubed <- pixeldataframe$pca1 ^ 3
- # This is also a good place to deal with factors that will be used
- # in bespoke models.
- pixeldataframe <- left_join(pixeldataframe, stratTab, by = c('meta_Strata' = 'Strata')) %>%
- mutate(meta_megaStrata = megaStrata) %>%
- select(-CruiseType, -ForType, -megaStrata)
- return(pixeldataframe)
- }
- trainingData <- transform_the_predictors(trainingData)
- # check metadata for factor levels not in training data
- system(paste0('s3cmd get --force s3://silviaterra-sequoia/', jobslug, '/setup/bounds/metadata.csv /tmp/metadata.csv'))
- metadata <- read.csv('/tmp/metadata.csv')
- tdStrata <- unique(trainingData$meta_megaStrata)
- allStrata <- metadata %>%
- left_join(stratTab) %>%
- select(megaStrata) %>%
- distinct() %>%
- .$megaStrata
- if(!all(as.character(allStrata) %in% as.character(tdStrata))) {
- stop('there are strata in the metadata that are not present in training data')
- }
- stemsPooledFormula <- as.formula(paste('tph + 1 ~ ', defaultPredictors))
- baPooledFormula <- as.formula(paste('baph + 1 ~ ', defaultPredictors))
- sizeTest <- trainingData %>%
- dplyr::group_by(meta_stand_id) %>%
- dplyr::summarize(nPlots = n()) %>%
- dplyr::ungroup()
- if (groupSize > max(sizeTest$nPlots)) {
- stop("Group size is larger than the maximum number of plots in any stand")
- }
- if (groupSize < 4) {
- stop("Group size is less than 4. What are you thinking?")
- }
- fullTrainingData <- trainingData %>%
- mutate(cruiser = cruiseData$cruiser[match(plot_id, cruiseData$plot_id)])
- zeroPlots <- fullTrainingData$plot_id[grep('zero', fullTrainingData$cruiser)]
- ```
- ----------------------
- Basal area models
- ----------------------
- ### BAPH POOLED MODEL
- ```{r baphPooled}
- #Deprecated Stan mean model until we figure out a permanent solution
- # baphPooled <- stan_glm(baPooledFormula, data = fullTrainingData,
- # family = Gamma(link = 'log'))
- #previous default pooled form - currently used for mean estimates
- baphPooledDefault <- glm(baPooledFormula, data = fullTrainingData,
- family = gaussian(link = 'log'),
- control = list(maxit = 1e3))
- #currently being used to get precision estimates
- baphPrecisionDefault <- stan_glm(baPooledFormula, data = fullTrainingData,
- family = gaussian(link = 'identity'))
- #Models using plot inclusion probabilities
- # totalAcres <- sum(metadata$stand_acreage)
- # cruisedStands <- unique(fullTrainingData$meta_stand_id)
- # cruiseWeights <- metadata %>%
- # filter(stand_id %in% cruisedStands) %>%
- # mutate(weight = stand_acreage / totalAcres) %>%
- # select(stand_id, weight)
- #
- #fullTrainingData <- left_join(fullTrainingData, stand_weights, by = c('meta_stand_id' = 'stand_id'))
- #
- # #currently being used to get precision estimates
- # baphPooledIdent <- stan_glm(baPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
- # family = gaussian(link = 'identity'))
- #
- # #previous default pooled form - currently used for mean estimates
- # baphGlm <- glm(baPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
- # family = gaussian(link = 'log'),
- # control = list(maxit = 1e3))
- ```
- ```{r baphBespoke}
- # cat('### Bespoke model')
- # ---------------------------------------------------------------------------
- ### BEGIN BESPOKE FITTING - fit by hand below
- # ---------------------------------------------------------------------------
- bespokePredictors <- gsub('pca9+', '', defaultPredictors)
- baBespokeFormula <- as.formula(paste('baph + 1 ~ ', bespokePredictors))
- #previous default pooled form - currently used for mean estimates
- baphBespoke <- glm(baBespokeFormula, data = fullTrainingData,
- family = gaussian(link = 'log'))
- #currently being used to get precision estimates
- baphBespokePrecision <- stan_glm(baBespokeFormula, data = fullTrainingData,
- family = gaussian(link = 'identity'), adapt_delta = 0.99)
- if(exists('baphBespoke')){
- baphPooled <- baphBespoke
- baphPrecision <- baphBespokePrecision
- } else{
- baphPooled <- baphPooledDefault
- baphPrecision <- baphPrecisionDefault
- }
- transform_the_baph_prediction <- function(predicted) {
- #add any necessary backtransformations to the baph value from prediction function
- # for example: default model glm(family = gaussian(link = 'log'))
- backtransformed <- exp(predicted) - 1
- #backtransformed <- predicted - 1 #Change this line as necessary!
- return(backtransformed)
- }
- baphCoeffTable <- bind_cols(data.frame(baphPooled$coefficients),
- data.frame(baphPrecision$coefficients)) %>%
- mutate(coefficient = row.names(data.frame(baphPooled$coefficients)))
- #Look at posterior predicted means and SD
- baphSims <- posterior_predict(baphPrecision) - 1
- baphMeans <- apply(baphSims, 1, mean)
- #view baphMeans
- hist(baphMeans)
- #view relative precision of baph and tph
- sd(baphMeans) / mean(baphMeans)
- baphSD <- apply(baphSims, 2, function(x){sd(x)})
- baphPredFrame <- data.frame(plot_id = fullTrainingData$plot_id,
- megaStrata = fullTrainingData$meta_megaStrata,
- observed = fullTrainingData$baph)
- baphPredFrame$stanPred <- colMeans(baphSims) - 1
- baphPredFrame$glmPred <- exp(predict(baphPooled)) - 1
- baphPlotFrame <- baphPredFrame %>%
- gather(source, predicted, -plot_id, -observed, -megaStrata) %>%
- mutate(resid = predicted - observed)
- baphRmseStan <- baphPlotFrame %>%
- filter(source == 'stanPred') %>%
- summarize(rmse = sqrt(mean(resid ^ 2))) %>%
- .$rmse
- baphRmseGlm <- baphPlotFrame %>%
- filter(source == 'glmPred') %>%
- summarize(rmse = sqrt(mean(resid ^ 2))) %>%
- .$rmse
- #####
- #CHECK MODEL CONVERGENCE
- #####
- #print out table of parameter means, summary stats, and convergence diagnostics
- pander(data.frame(summary(baphPrecision)))
- #Look at plot of Rhats and effective sample size
- #Ideally, all Rhats are < 1.1 and all Neff are > 0.25
- #These are logical checks to ensure model converged
- plot(baphPrecision, 'rhat')
- #ratio of effective sample size over total iterations drawn
- plot(baphPrecision, 'neff_hist')
- #Predicted vs. observed
- ggplot(data = baphPlotFrame, aes(x = observed, y = predicted, col = megaStrata)) +
- geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 1) +
- theme_bw() + facet_wrap(~source)
- #Observed vs. residuals
- ggplot(data = baphPlotFrame, aes(x = observed, y = resid, col = megaStrata)) +
- geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 0) +
- theme_bw() + facet_wrap(~source)
- #Precision diagnostics, posterior predictive checks, and other diagnostics
- #Compare distribution of baph and tph (y is observed) to multiple simulated datasets from model (yrep)
- pp_check(baphPrecision) + ggtitle('baph') +
- ggtitle('We are not currently using this model.')
- #^^^ we aren't currently using this model
- #Posterior predictive tests of median, upper, and lower quantiles
- ppc_stat(trainingData$baph, baphSims, stat = "mean") + ggtitle('Posterior predictive test of sample mean')
- ppc_stat(trainingData$baph, baphSims, stat = function(x){quantile(x, 0.95)}) + ggtitle('Posterior predictive test of upper (95%) quantile')
- ppc_stat(trainingData$baph, baphSims, stat = function(x){quantile(x, 0.05)}) + ggtitle('Posterior predictive test of lower (5%) quantile')
- #Look at posterior predictive intervals of each plot from baphPrecision
- baphStanFrame <- baphPlotFrame %>% filter(source == 'stanPred')
- ppc_intervals(colMeans(baphSims) - 1, baphSims - 1, x = baphStanFrame$observed)
- # TODO figure out why gaussian(link = 'log') gives bogus results
- # TODO look into different default priors (rather than out of box)
- # e.g. lasso priors
- # TODO look into QR transforming the predictors
- # TODO incorporate plot inclusion probability into observation weights
- ```
- __baph default pooled GLM model RMSE is
- `r round(baphRmseGlm, digits = 2)` m2 per hectare from the model
- ----------------------
- Stem density models
- ----------------------
- ### TPH POOLED MODEL
- ```{r tphPooled}
- #Deprecated Stan mean model until we figure out a permanent solution
- # tphPooledDefault <- stan_glm(stemsPooledFormula, data = fullTrainingData,
- # family = poisson(link = 'log'))
- #previous default pooled form - currently used for mean estimates
- tphPooledDefault <- glm(stemsPooledFormula, data = fullTrainingData,
- family = gaussian(link = 'log'),
- control = list(maxit = 1e3))
- #currently being used to get precision estimates
- tphPrecisionDefault <- stan_glm(stemsPooledFormula, data = fullTrainingData,
- family = gaussian(link = 'identity'))
- #Models using plot inclusion probabilities
- # #currently being used to get precision estimates
- # tphPooledIdent <- stan_glm(stemsPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
- # family = gaussian(link = 'identity'))
- #
- # #previous default pooled form - currently used for mean estimates
- # tphGlm <- glm(stemsPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
- # family = gaussian(link = 'log'),
- # control = list(maxit = 1e3))
- ```
- ```{r tphBespoke}
- # cat('### Bespoke model')
- # ---------------------------------------------------------------------------
- ### BEGIN BESPOKE FITTING - fit by hand below
- # ---------------------------------------------------------------------------
- #
- stemsBespokeFormula <- as.formula(paste('tph + 1 ~ ', bespokePredictors))
- #previous default pooled form - currently used for mean estimates
- tphBespoke <- glm(stemsBespokeFormula, data = fullTrainingData,
- family = gaussian(link = 'log'))
- #currently being used to get precision estimates
- tphBespokePrecision <- stan_glm(stemsBespokeFormula, data = fullTrainingData,
- family = gaussian(link = 'identity'), adapt_delta = 0.99)
- if(exists('tphBespoke')){
- tphPooled <- tphBespoke
- tphPrecision <- tphBespokePrecision
- } else {
- tphPooled <- tphPooledDefault
- tphPrecision <- tphPrecisionDefault
- }
- transform_the_tph_prediction <- function(predicted) {
- #add any necessary backtransformations to the tph value from prediction function
- # for example: default model glm(family = gaussian(link = 'log'))
- backtransformed <- exp(predicted) - 1
- #backtransformed <- predicted - 1 #Change this line as necessary!
- return(backtransformed)
- }
- tphCoeffTable <- bind_cols(data.frame(tphPooled$coefficients),
- data.frame(tphPrecision$coefficients)) %>%
- mutate(coefficient = row.names(data.frame(tphPooled$coefficients)))
- #Look at posterior predicted means and SD
- tphSims <- posterior_predict(tphPrecision) - 1
- tphMeans <- apply(tphSims, 1, mean)
- #view baphMeans
- hist(tphMeans)
- #view relative precision of baph and tph
- sd(tphMeans) / mean(tphMeans)
- tphSD <- apply(tphSims, 2, function(x){sd(x)})
- tphPredFrame <- data.frame(plot_id = fullTrainingData$plot_id,
- megaStrata = fullTrainingData$meta_megaStrata,
- observed = fullTrainingData$tph)
- tphPredFrame$stanPred <- colMeans(tphSims) - 1
- tphPredFrame$glmPred <- exp(predict(tphPooled)) - 1
- tphPlotFrame <- tphPredFrame %>%
- gather(source, predicted, -plot_id, -observed, -megaStrata) %>%
- mutate(resid = predicted - observed)
- tphRmseStan <- tphPlotFrame %>%
- filter(source == 'stanPred') %>%
- summarize(rmse = sqrt(mean(resid ^ 2))) %>%
- .$rmse
- tphRmseGlm <- tphPlotFrame %>%
- filter(source == 'glmPred') %>%
- summarize(rmse = sqrt(mean(resid ^ 2))) %>%
- .$rmse
- #####
- #CHECK MODEL CONVERGENCE
- #####
- #print out table of parameter means, summary stats, and convergence diagnostics
- pander(data.frame(summary(tphPrecision)))
- #Look at plot of Rhats and effective sample size
- #Ideally, all Rhats are < 1.1 and all Neff are > 0.25
- #These are logical checks to ensure model converged
- plot(tphPrecision, 'rhat')
- #ratio of effective sample size over total iterations drawn
- plot(tphPrecision, 'neff_hist')
- #Predicted vs. observed
- ggplot(data = tphPlotFrame, aes(x = observed, y = predicted, col = megaStrata)) +
- geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 1) +
- theme_bw() + facet_wrap(~source)
- #Observed vs. residuals
- ggplot(data = tphPlotFrame, aes(x = observed, y = resid, color = megaStrata)) +
- geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 0) +
- theme_bw() + facet_wrap(~source)
- #Precision diagnostics, posterior predictive checks, and other diagnostics
- #Compare distribution of baph and tph (y is observed) to multiple simulated datasets from model (yrep)
- pp_check(tphPrecision) + ggtitle('tph - we are not currently using this model')
- #^^^ we aren't currently using this model
- #Posterior predictive tests of median, upper, and lower quantiles
- ppc_stat(trainingData$tph, tphSims, stat = "mean") + ggtitle('Posterior predictive test of sample mean')
- ppc_stat(trainingData$tph, tphSims, stat = function(x){quantile(x, 0.95)}) + ggtitle('Posterior predictive test of upper (95%) quantile')
- ppc_stat(trainingData$tph, tphSims, stat = function(x){quantile(x, 0.05)}) + ggtitle('Posterior predictive test of lower (5%) quantile')
- #Look at posterior predictive intervals of each plot from baphPrecision
- tphStanFrame <- tphPlotFrame %>% filter(source == 'stanPred')
- ppc_intervals(colMeans(tphSims) - 1, tphSims - 1, x = tphStanFrame$observed)
- # TODO figure out why gaussian(link = 'log') gives bogus results
- # TODO look into different default priors (rather than out of box)
- # e.g. lasso priors
- # TODO look into QR transforming the predictors
- # TODO incorporate plot inclusion probability into observation weights
- ```
- __tph default pooled GLM model RMSE is
- `r round(tphRmseGlm, digits = 2)` stems per hectare from the model.__
- ```{r defineCaps}
- pooled_baphCap <- quantile(trainingData$baph, 0.99) + sd(trainingData$baph)
- pooled_tphCap <- quantile(trainingData$tph, 0.99) + sd(trainingData$tph)
- ```
- The baph cap is set at `r round(pooled_baphCap, 1) ` for pooled models.
- The tph cap is set at `r round(pooled_tphCap, 1)` for pooled models.
- ```{r ModelAssistedRatios}
- trainingData$baphPred <- baphPredFrame$glmPred
- trainingData$tphPred <- tphPredFrame$glmPred
- ##Can manually change nPlots filter here if needed
- # Model-assisted stand-level ratios
- standRatVals <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- group_by(meta_stand_id) %>%
- summarize(baphObsMean = mean(baph),
- tphObsMean = mean(tph),
- baphPredMean = mean(baphPred),
- tphPredMean = mean(tphPred),
- nPlots = n()) %>%
- mutate(tphRat = tphObsMean / tphPredMean,
- baphRat = baphObsMean / baphPredMean) %>%
- filter(nPlots >= 5)
- # Model-assisted strata-level ratios
- # define grouping variable in metadata (without meta_ prefix)
- # if no grouping structure, set to NULL
- strataVar <- 'megaStrata'
- # if no grouping variable, just get population-level ratios
- if(is.null(strataVar)) {
- strataRatios <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- mutate(strataVar = 'population') %>%
- group_by(strataVar) %>%
- summarize(baphObsMean = mean(baph),
- tphObsMean = mean(tph),
- baphPredMean = mean(baphPred),
- tphPredMean = mean(tphPred),
- nPlots = n()) %>%
- mutate(tphRat = tphObsMean / tphPredMean,
- baphRat = baphObsMean / baphPredMean)
- } else {
- trainingData$strataVar <- trainingData[, paste0('meta_', strataVar)]
- strataTab <- trainingData %>%
- select(meta_stand_id, strataVar) %>%
- distinct()
- strataRatios <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- group_by(strataVar) %>%
- summarize(baphObsMean = mean(baph),
- tphObsMean = mean(tph),
- baphPredMean = mean(baphPred),
- tphPredMean = mean(tphPred),
- nPlots = n()) %>%
- mutate(tphRat = tphObsMean / tphPredMean,
- baphRat = baphObsMean / baphPredMean)
- }
- # model-assisted population level ratios
- populationRatios <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- summarize(baphObsMean = mean(baph),
- tphObsMean = mean(tph),
- baphPredMean = mean(baphPred),
- tphPredMean = mean(tphPred),
- nPlots = n()) %>%
- mutate(tphRat = tphObsMean / tphPredMean,
- baphRat = baphObsMean / baphPredMean)
- varianceTable <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- filter(meta_stand_id %in% standRatVals$meta_stand_id) %>%
- mutate(BAPH_resid = baphPred - baph,
- TPH_resid = tphPred - tph) %>%
- group_by(meta_stand_id) %>%
- mutate(meanBAPH_resid= mean(BAPH_resid),
- meanTPH_resid = mean(TPH_resid)) %>%
- ungroup() %>%
- mutate(BAPH_errsquared = (BAPH_resid - meanBAPH_resid) ^ 2,
- TPH_errsquared = (TPH_resid - meanTPH_resid) ^ 2) %>%
- group_by(meta_stand_id) %>%
- left_join(., standRatVals) %>%
- summarize(n = length(tph),
- varBAPH_MA = sum(BAPH_errsquared) / (length(baph) - 1),
- varTPH_MA = sum(TPH_errsquared) / (length(tph) - 1),
- varBAPH_DB = sum((baph[plot_id %nin% zeroPlots] -
- unique(baphObsMean)) ^ 2) /
- (length(baph[plot_id %nin% zeroPlots]) - 1),
- varTPH_DB = sum((tph[plot_id %nin% zeroPlots] -
- unique(tphObsMean)) ^ 2) /
- (length(tph[plot_id %nin% zeroPlots]) - 1)) %>%
- select(meta_stand_id, n,
- varBAPH_DB, varTPH_DB,
- varBAPH_MA, varTPH_MA, n) %>%
- ungroup()
- standRatios <- left_join(standRatVals, varianceTable) %>%
- filter(meta_stand_id != 'OC0201-2725')
- # get MA population CIs
- metJoin <- trainingData %>%
- select(meta_stand_id, meta_stand_acre) %>%
- distinct()
- maCIs <- varianceTable %>%
- mutate(baphMaSe = sqrt(varBAPH_MA / n),
- tphMaSe = sqrt(varTPH_MA / n)) %>%
- mutate(bapaMaSe = convert_baph_to_bapa(baphMaSe),
- tpaMaSe = tphMaSe / 2.47) %>%
- select(meta_stand_id, bapaMaSe, tpaMaSe, n) %>%
- left_join(metJoin) %>%
- summarize(bapaMaSePop = sqrt(
- sum((meta_stand_acre / sum(meta_stand_acre)) ^ 2 * bapaMaSe ^ 2)
- ),
- tpaMaSePop = sqrt(
- sum((meta_stand_acre / sum(meta_stand_acre)) ^ 2 * tpaMaSe ^ 2)
- ),
- nStands = n(), nPlots = sum(n)) %>%
- mutate(bapaCI = qt(1 - 0.1 / 2, df = nPlots - nStands) * bapaMaSePop,
- tpaCI = qt(1 - 0.1 / 2, df = nPlots - nStands) * tpaMaSePop)
- pander(maCIs)
- ```
- --------------------------------
- Diameter distribution models
- --------------------------------
- ```{r distroFit}
- # Necessary to register parallel clusters by hand
- cl <- makeForkCluster(detectCores())
- registerDoParallel(cl)
- genericModelBuild <- function(N, ys, predVars, ...) {
- if (is.null(ncol(ys))) { # in case only one spp.
- Gpsi <- ys
- } else {
- Gpsi <- ys[, N]
- }
- dModelObject <- cbind(data.frame(y = Gpsi), predVars)
- # TODO fit RF model, keep all trees
- GModelRF <- randomForest(y ~ .,
- data = dModelObject,
- ntree = 50, keep.forest = T,
- type = 'regression')
- return(GModelRF)
- }
- dist_to_drop <- which(is.na(fullTrainingData[, distroVarsIDX[1]]))
- if(length(dist_to_drop) > 0) {
- fullTrainingDataNoNA <- fullTrainingData[-dist_to_drop, ] %>%
- filter(tph > 0)
- } else {
- fullTrainingDataNoNA <- fullTrainingData %>%
- filter(tph > 0)
- }
- distroDefault <- lapply(1:length(distroVarsIDX), genericModelBuild,
- ys = fullTrainingDataNoNA[, distroVarsIDX],
- predVars = fullTrainingDataNoNA[, pcScoresIDX])
- distroBespokePredictors <- NULL
- # ------------------------------------------------------------------
- ### Bespoke diameter distribution example
- # ------------------------------------------------------------------
- distroBespokePredictors <- c(names(fullTrainingDataNoNA)[pcScoresIDX], 'meta_megaStrata')
- distroBespokePredictors <- distroBespokePredictors[distroBespokePredictors != 'pca9']
- distroBespoke <- lapply(1:length(distroVarsIDX), genericModelBuild,
- ys = fullTrainingDataNoNA[, distroVarsIDX],
- predVars = fullTrainingDataNoNA[, distroBespokePredictors])
- #
- # -----------------------------------------------------------------
- if(exists('distroBespoke')) {
- distroSet <- distroBespoke
- } else {
- distroSet <- distroDefault
- }
- names(distroSet) <- names(fullTrainingDataNoNA)[distroVarsIDX]
- ```
- ```{r distroDiagnostics}
- distroDF <- data.frame()
- for (i in 1:length(distroSet)) {
- tmpPred <- predict(distroSet[[i]], newdata = fullTrainingDataNoNA, predict.all = TRUE)
- tempDF <- data.frame(predicted = tmpPred$aggregate)
- tempDF$observed <- fullTrainingDataNoNA[, distroVarsIDX[i]]
- tempDF$meta_stand_id <- fullTrainingDataNoNA$meta_stand_id
- tempDF$plot_id <- fullTrainingDataNoNA$plot_id
- tempDF$meta_megaStrata <- fullTrainingDataNoNA$meta_megaStrata
- dbhClass <- names(fullTrainingDataNoNA)[distroVarsIDX][i]
- tempDF$dbhClass <- as.numeric(strsplit(dbhClass, split = '_')[[1]][2])
- tempDF$SD <- apply(tmpPred$individual, 1, function(x) {sd(x)})
- distroDF <- rbind(distroDF, tempDF)
- }
- # normalize predicted distro for each plot (so all distro scores sum to one for each plot)
- normalized <- distroDF %>%
- select(plot_id, dbhClass, predicted) %>%
- group_by(plot_id) %>%
- mutate(predicted = ifelse(predicted < 0, 0, predicted)) %>%
- mutate(predNorm = predicted / sum(predicted)) %>%
- select(plot_id, dbhClass, predNorm)
- distroDF <- distroDF %>%
- left_join(normalized)
- distFit <- ggplot(distroDF, aes(x = observed, y = predicted, color = meta_megaStrata)) +
- facet_wrap(~ dbhClass) + geom_point() + theme_bw() +
- geom_abline(intercept = 0, slope = 1)
- print(distFit)
- DiaDataTidy <- distroDF %>%
- gather(key = variable, value = value, -dbhClass, -plot_id, -meta_stand_id,
- -meta_megaStrata, -SD, -predNorm)
- distPlot <- ggplot(DiaDataTidy,
- aes(x = factor(dbhClass),
- y = value,
- color = variable)) +
- geom_boxplot(position = position_dodge(0.5), width = 1) +
- facet_wrap(~variable) + theme_bw() + xlab("Diameter class") +
- ylim(0, 1) + ylab("Trees per hectare") +
- scale_color_manual("", values = c("darkblue", "orange"))
- print(distPlot)
- #Box plot of average precision (standard deviation) per diameter class
- distroSDPlot <- ggplot(distroDF, aes(x = factor(dbhClass), y = SD)) + geom_boxplot() +
- theme_bw() + xlab('diameter class') + ylab('standard deviation')
- print(distroSDPlot)
- ```
- ```{r distroRatios, include = F}
- if(is.null(strataVar)) {
- strataDistroRatios <- distroDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- mutate(strataVar = 'population') %>%
- group_by(strataVar, dbhClass) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(DClass = dbhClass, meanPred, meanObs, rat = ratio)
- } else {
- strataDistroRatios <- distroDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- left_join(strataTab) %>%
- group_by(strataVar, dbhClass) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(strataVar, DClass = dbhClass, meanPred, meanObs, rat = ratio)
- }
- # model-assisted population level distro ratios
- populationDistroRatios <- distroDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- group_by(dbhClass) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(DClass = dbhClass, meanPred, meanObs, rat = ratio)
- # model-assisted stand-level distro ratios
- standDistroRatios <- distroDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- filter(meta_stand_id %in% unique(standRatios$meta_stand_id)) %>%
- group_by(meta_stand_id, dbhClass) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- select(stand = meta_stand_id, DClass = dbhClass, meanPred,
- meanObs, rat = ratio)
- # obsDiams <- cruiseData$diameter[!is.na(cruiseData$diameter)]
- # N <- length(obsDiams)
- # N_pred <- length(obsDiams)
- #
- # weiFit <- stan(file = '/tmp/weibull-dist.stan', data = stanData)
- #
- # diamSims <- extract(weiFit, 'y_pred')$y_pred
- ```
- --------------------------
- Importance Value Models
- --------------------------
- ```{r importanceFit}
- speciesCols <- trainingData %>%
- select(contains('spp'), -contains('meta'))
- speciesLabs <- names(speciesCols)
- impBespokePredictors <- c(names(trainingData)[pcScoresIDX], 'meta_megaStrata')
- impBespokePredictors <- impBespokePredictors[impBespokePredictors %nin% c('pca9')]
- fit_spp_models <- function(spp,
- td,
- threshold = NULL,
- pcScoresIDX,
- bespokePredictors = NULL, # predictors in addition to pcas
- stan = F) { # use stan models?
- predictors <- bespokePredictors
- training <- td %>%
- filter(baph > 0) %>%
- select(y = spp, one_of(predictors)) %>%
- mutate(present = ifelse(y > 0, 1, 0))
- occTab <- training %>%
- group_by(present) %>%
- summarize(n = n()) %>%
- mutate(prop = n / sum(n))
- # sometimes a species is on every plot, or it is on no plots (groupstrap)
- # a presence/absence model is not necessary in these cases
- if(nrow(occTab) == 1) {
- outcome <- occTab$present
- presenceMod <- lm(outcome ~ 1)
- training$predProp <- predict(presenceMod, newdata = training)
- threshold <- 1
- } else {
- presentProp <- occTab %>%
- filter(present == 1) %>%
- .$prop
- presentFormula <- as.formula(paste('present ~ ',
- paste0(predictors, collapse = '+')))
- # fit the presence/absence model
- # if the species not present or absent on more than 95% of plots, fit
- # statistical model for presence/absence
- if(all(occTab$prop <= 0.95)) {
- # fit model in stan if desired
- if(stan) {
- tPrior <- student_t(df = 7, location = 0, scale = 2.5)
- presenceMod <- stan_glm(presentFormula, data = training,
- family = binomial(link = 'logit'),
- prior = tPrior, prior_intercept = tPrior,
- cores = detectCores() - 4)
- #Calculate postProp and presAbs
- presentPred <- posterior_predict(presenceMod)
- training$predProp <- unname(round(colMeans(presentPred), 3))
- } else { # otherwise we will just use glm
- presenceMod <- glm(presentFormula, data = training,
- family = 'binomial')
- training$predProp <- predict(presenceMod, type = 'response')
- }
- } else { # if species presence/absence is highly imbalanced use random forest
- presenceMod <- randomForest(presentFormula, data = training)
- training$predProp <- predict(presenceMod)
- }
- # threshold will be the mean present/absent score above which results in
- # the same proportion of plots getting a 'present' assignment as there were plots
- # with the species in the training data
- # the data-driven threshold can be overriden in the function call
- if(is.null(threshold)) {
- threshold <- quantile(training$predProp, 1 - presentProp)
- }
- }
- training <- training %>%
- mutate(presAbs = ifelse(predProp >= threshold, 1, 0))
- # fit importance model to plots where species is present
- trainingPresent <- training %>%
- filter(present == 1)
- impFormula <- as.formula(paste('y ~ ', paste0(predictors, collapse = '+')))
- # use regression if more than 5 plots with the species
- if(nrow(trainingPresent) > 5) {
- impMod <- randomForest(impFormula, data = trainingPresent,
- type = 'regression')
- } else { # use classification if 5 or fewer plots with the species
- if(nrow(trainingPresent) > 1) {
- trainingPresent$y <- factor(as.character(trainingPresent$y))
- impMod <- randomForest(impFormula,
- data = trainingPresent,
- type = 'classification')
- } else {
- # if only one plot, fit model to predict that plot's value
- if(nrow(trainingPresent) == 1) {
- impMod <- lm(y ~ 1, data = trainingPresent)
- } else { # if no plots (groupstrap) predict 0
- impMod <- lm(0 ~ 1)
- }
- }
- }
- outMods <- list(spp = spp,
- threshold = threshold,
- presenceMod = presenceMod,
- impMod = impMod,
- predictors = predictors)
- return(outMods)
- }
- # fit species models with data driven thresholds
- sppMods <- lapply(speciesLabs, fit_spp_models,
- td = trainingData,
- pcScoresIDX = pcScoresIDX,
- bespokePredictors = impBespokePredictors,
- stan = F)
- names(sppMods) <- speciesLabs
- # Function to predict species importance from sppMods
- predict_spp_importance <- function(sppMod, pixels) {
- pixels$present <- 0
- # Set up tmp threshold
- threshold <- sppMod[['threshold']]
- # Set up tmp binomial and importance models
- presenceMod <- sppMod[['presenceMod']]
- impMod <- sppMod[['impMod']]
- smoothMod <- sppMod[['smoothMod']]
- # Predict presence/absence
- if('stanreg' %in% class(presenceMod)) {
- postPropSims <- posterior_predict(presenceMod, newdata = pixels)
- pixels$propPred <- colMeans(postPropSims)
- } else {
- pixels$propPred <- predict(presenceMod, newdata = pixels, type = 'response')
- }
- pixelPreds <- pixels %>%
- mutate(presAbsPred = ifelse(propPred > threshold, 1, 0)) %>%
- mutate(rawImp = as.numeric(as.character(predict(impMod, newdata = .)))) %>%
- mutate(sppImp = ifelse(presAbsPred == 1, rawImp, 0)) %>%
- select(presAbsPred, sppImp)
- out <- pixelPreds %>%
- select(sppImp)
- names(out) <- sppMod[[1]]
- return(out)
- }
- # training data PCAs for testing
- pixels <- trainingData
- sppPreds <- bind_cols(mclapply(sppMods,
- pixels = pixels,
- predict_spp_importance))
- ```
- ```{r importanceDiagnostics}
- impDF <- data.frame()
- for (i in 1:length(sppMods)) {
- sppName <- sppMods[[i]][[1]]
- tmpPred <- predict_spp_importance(sppMods[[i]], pixels = fullTrainingData)
- tempDF <- data.frame(predicted = unlist(tmpPred))
- tempDF$meta_stand_id <- fullTrainingData$meta_stand_id
- tempDF$plot_id <- fullTrainingData$plot_id
- tempDF$meta_megaStrata <- fullTrainingData$meta_megaStrata
- tempDF$observed <- fullTrainingData[, sppName]
- stID <- unlist(strsplit(sppName, split = '_'))[2]
- tempDF$species <- st_sp2$common[match(stID, st_sp2$stID)]
- tempDF$sppID <- sppName
- #tempDF$SD <- apply(tmpPred$individual, 1, function(x) {sd(x)})
- impDF <- rbind(impDF, tempDF)
- }
- impPlot <- ggplot(impDF, aes(x = observed, y = predicted, color = meta_megaStrata)) +
- geom_point(alpha = 0.5) +
- facet_wrap( ~ species) +
- theme_bw() +
- xlim(0, 1) + ylim(0, 1) +
- xlab("Observed species importance") +
- ylab("Predicted species importance") +
- geom_abline(slope = 1, intercept = 0, color = 'red')
- print(impPlot)
- # #Species importance score precision
- # impSDPlot <- ggplot(impDF, aes(x = species, y = SD)) + geom_boxplot() +
- # theme_bw() + xlab('diameter class') + ylab('standard deviation')
- # print(impSDPlot)
- ```
- ```{r impRatios}
- if(is.null(strataVar)) {
- strataImpRatios <- impDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- mutate(strataVar = 'population') %>%
- group_by(strataVar, sppID) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(strataVar, sppID, meanPred, meanObs, rat = ratio)
- } else {
- strataImpRatios <- impDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- left_join(strataTab) %>%
- group_by(strataVar, sppID) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(strataVar, sppID, meanPred, meanObs, rat = ratio)
- }
- # model-assisted population level species importance ratios
- populationImpRatios <- impDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- group_by(sppID) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
- select(sppID, meanPred, meanObs, rat = ratio)
- # model-assisted stand-level species importance ratios
- standImpRatios <- impDF %>%
- filter(as.character(plot_id) %nin% zeroPlots) %>%
- filter(meta_stand_id %in% unique(standRatios$meta_stand_id)) %>%
- group_by(meta_stand_id, sppID) %>%
- summarize(meanPred = mean(predicted),
- meanObs = mean(observed)) %>%
- mutate(ratio = meanObs / meanPred) %>%
- select(stand = meta_stand_id, sppID, meanPred,
- meanObs, rat = ratio)
- ```
- ```{r computeEmpiricalMatrix}
- # read in trees, set up predictors
- wrapTrees <- cruiseDataNoZero %>%
- mutate(diameterClass = round(diameter)) %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id)))
- sppNames <- names(trainingData)[grep('spp', names(trainingData))]
- sppNames <- sppNames[!grepl('meta', sppNames)]
- diamNames <- names(trainingData)[grep('diam_', names(trainingData))]
- diamNames <- diamNames[!grepl('meta', diamNames)]
- wrapPreds <- c('baph', 'tph', sppNames, diamNames)
- predsSide <- paste0(wrapPreds, collapse = '+')
- wrapFormula <- as.formula(paste0('common ~ ', predsSide))
- # function that will build model to predict the species of a tree of a given
- # diameter as a function of baph, tph, sppImp scores, and diam scores
- wrap_by_diam <- function(diam) {
- print(diam)
- # subset of trees where diameter = diam
- diamTraining <- wrapTrees %>%
- filter(diameterClass == diam) %>%
- mutate(roundedExpansion = round(expansion))
- diamTraining <- diamTraining[rep(row.names(diamTraining), diamTraining$roundedExpansion), ]
- distinctRows <- diamTraining %>%
- distinct()
- # build a model to predict species as a function of stocking and species.
- if(length(unique(diamTraining$common)) > 2 & nrow(distinctRows) > 5) {
- # fit multinomial model to predict probability of a given tree belonging
- # to each species (use predict(mod, newdata, 'probs') to get probs)
- mod <- nnet::multinom(wrapFormula,
- data = diamTraining, MaxNWts = 10000,
- maxit = 1000)
- if('try-error' %in% class(mod)) {
- stop(paste0('wrapMod failed to build for diameter class ', diam))
- }
- return(mod)
- } else { # if we only observe trees in the diameter class on less than 5 plots,
- # we assign empirical probability from training treelist
- diamSpp <- unique(diamTraining$common)
- sppTab <- table(diamTraining$common) / nrow(diamTraining)
- sppFrame <- data.frame(t(matrix(sppTab)))
- names(sppFrame) <- dimnames(sppTab)[[1]]
- return(sppFrame)
- }
- }
- # list of unique diameters
- uniqueDiams <- wrapTrees %>%
- arrange(diameterClass) %>%
- select(diameterClass) %>%
- distinct() %>%
- .$diameterClass
- # list of unique species names
- uniqueCommons <- wrapTrees %>%
- select(common) %>%
- distinct() %>%
- arrange(common) %>%
- .$common
- # build models for all diameter classes
- wrapMods <- mclapply(uniqueDiams, wrap_by_diam, mc.cores = detectCores() - 2)
- names(wrapMods) <- paste('diam', uniqueDiams, sep = '_')
- # check to see if wrapMods will work
- classCheck <- c()
- for(i in 1:length(wrapMods)) {
- classCheck <- c(classCheck, class(wrapMods[[i]]))
- }
- if(any(classCheck == 'try-error')) {
- rm(list = 'wrapMods')
- stop('wrapMods failed to write out properly')
- }
- ```
- ```{r sppDiameterLimits}
- # global diameter limits by species
- populationCruiseLimits <- cruiseDataNoZero %>%
- mutate(globalMinD = round(min(diameter)),
- globalMaxD = round(max(diameter))) %>%
- group_by(common) %>%
- summarize(minD = round(min(diameter) - 2),
- maxD = round(max(diameter) + 2),
- globalMinD = unique(globalMinD),
- globalMaxD = unique(globalMaxD)) %>%
- mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
- maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
- select(common, minAllowD, maxAllowD)
- # strata-level diameter limits for each species
- if(is.null(strataVar)) {
- strataCruiseLimits <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- mutate(globalMinD = round(min(diameter)),
- globalMaxD = round(max(diameter))) %>%
- mutate(strataVar = 'population') %>%
- group_by(strataVar, common) %>%
- summarize(minD = round(min(diameter) - 2),
- maxD = round(max(diameter) + 2),
- globalMinD = unique(globalMinD),
- globalMaxD = unique(globalMaxD)) %>%
- mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
- maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
- select(strataVar, common, minAllowD, maxAllowD)
- } else {
- strataCruiseLimits <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- mutate(globalMinD = round(min(diameter)),
- globalMaxD = round(max(diameter))) %>%
- group_by(strataVar, common) %>%
- summarize(minD = round(min(diameter) - 2),
- maxD = round(max(diameter) + 2),
- globalMinD = unique(globalMinD),
- globalMaxD = unique(globalMaxD)) %>%
- mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
- maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
- select(strataVar, common, minAllowD, maxAllowD)
- }
- ```
- ---------------------
- Core Groupstrap Results
- ---------------------
- ```{r groupStrap, results = 'hide'}
- ##TODO: add a function here to iterate over each set of
- #values in metadata and try predicting bapa and tpa
- bootPolygons <- trainingData %>%
- dplyr::group_by(meta_stand_id) %>%
- dplyr::summarize(nPlots = length(baph),
- observedBAPH = mean(baph),
- seObservedBAPH = sd(baph) / sqrt(length(meta_stand_id)),
- predBAPH = mean(baphPred),
- observedTPH = mean(tph),
- seObservedTPH = sd(tph) / sqrt(length(meta_stand_id)),
- predTPH = mean(tphPred),
- # 90% CI
- tScore = qt(1 - (0.1 / 2), nPlots - 1)) %>%
- dplyr::ungroup() %>%
- dplyr::filter(nPlots >= groupSize) # change as needed
- predict_for_one_group <- function(polygon) {
- polygonIdx <- which(bootPolygons$meta_stand_id == polygon)
- catMsg(paste('Polygon', polygonIdx, ":", polygon))
- #subset training data for reduced model fitting
- tdSub <- trainingData %>%
- filter(meta_stand_id != polygon)
- #subset training data for prediction to target polygon
- tdApply <- trainingData %>%
- filter(meta_stand_id == polygon)
- baphGS <- baphPooledDefault
- tphGS <- tphPooledDefault
- baphModelSub <- update(baphGS, data = tdSub)
- baPredSub <- exp(predict(baphModelSub,
- newdata = tdApply)) - 1
- tphModelSub <- update(tphGS, data = tdSub)
- tphPredSub <- exp(predict(tphModelSub,
- newdata = tdApply)) - 1
- #
- # # make distro and imp predictions using full models because
- # # RF models take too long to refit repeatedly
- distroBespokePredictors <- NULL
- if(is.null(distroBespokePredictors)) {
- distroGroupSub <- lapply(1:length(distroVarsIDX), genericModelBuild,
- ys = tdSub[, distroVarsIDX],
- predVars = tdSub[, pcScoresIDX])
- } else {
- distroGroupSub <- lapply(1:length(distroVarsIDX), genericModelBuild,
- ys = tdSub[, distroVarsIDX],
- predVars = tdSub[, distroBespokePredictors])
- }
- names(distroGroupSub) <- names(tdSub)[distroVarsIDX]
- predict_distro_pixels <- function(pixelsData) {
- runOne <- function(J) {
- if(is.null(distroBespokePredictors)) {
- newdata <- pixelsData[, pcScoresIDX]
- } else {
- newdata <- pixelsData[, distroBespokePredictors]
- }
- return(predict(distroGroupSub[[J]], newdata = newdata))
- }
- temp <- data.frame(do.call(cbind, lapply(X = 1:length(distroGroupSub),
- FUN = runOne)))
- names(temp) <- names(distroGroupSub)
- return(temp)
- }
- distroFrame <- predict_distro_pixels(tdApply)
- distroOut <- as.data.frame(t(colMeans(distroFrame)))
- impPredictors <- c(names(trainingData)[pcScoresIDX])
- impPredictors <- impPredictors[impPredictors != 'pca9']
- sppModsSub <- lapply(speciesLabs, fit_spp_models,
- td = tdSub,
- pcScoresIDX = pcScoresIDX,
- bespokePredictors = impPredictors,
- stan = F)
- impPredsSub <- bind_cols(mclapply(sppModsSub,
- pixels = tdApply,
- predict_spp_importance))
- impOut <- as.data.frame(t(colMeans(impPredsSub))) / sum(colMeans(impPredsSub))
- impOut[impOut < 0] <- 0
- ##We want to make baph and tph figures:
- #stand-wise full model
- #stand-wise reduced model
- #Diameter distribution by stand
- #Species importance by stand
- stockOut <- data.frame("meta_stand_id" = polygon,
- "groupModelPredTPH" = mean(tphPredSub),
- "groupModelPredBAPH" = mean(baPredSub))
- #
- output <- list(stockOut, distroOut, impOut)
- return(output)
- }
- #
- # # Perform groupstrap on all polygons:
- bootResults <- lapply(bootPolygons$meta_stand_id,
- predict_for_one_group)
- #
- # # Define function to pull from bootResults list
- extract_from_list <- function(i, bootResults, place) {
- return(bootResults[[i]][place][[1]])
- }
- #
- # # Extract the tph and ba predictions:
- bootPolygonsResults <- bootPolygons %>%
- bind_cols(do.call(rbind, lapply(1:length(bootResults),
- extract_from_list,
- bootResults = bootResults,
- place = 1)))
- #
- # # Extract the diameter distribution predictions:
- distroPredictions <- bind_rows(lapply(1:length(bootResults),
- extract_from_list,
- bootResults = bootResults,
- place = 2)) %>%
- mutate(meta_stand_id = bootPolygons$meta_stand_id,
- origin = 'model')
- #
- # # Extract the species importance predictions
- impPredictions <- bind_rows(lapply(1:length(bootResults),
- extract_from_list,
- bootResults = bootResults,
- place = 3)) %>%
- mutate(meta_stand_id = bootPolygons$meta_stand_id,
- origin = 'model')
- ```
- ### Basal Area per Hectare
- ```{r groupBAPH}
- baphTable <- bootPolygonsResults %>%
- dplyr::select(meta_stand_id, nPlots, tScore, contains("BAPH")) %>%
- dplyr::mutate(lowerCI = observedBAPH - tScore * seObservedBAPH,
- upperCI = observedBAPH + tScore * seObservedBAPH) %>%
- dplyr::select(meta_stand_id, nPlots, contains("CI"), contains("BAPH"))
- #
- pander(baphTable,
- split.table = Inf,
- justify = 'left',
- digits = 2)
- #
- baphLong <- baphTable %>%
- tidyr::gather(key = fit, value = predicted,
- predBAPH, groupModelPredBAPH)
- #
- baphMax <- baphLong %>%
- dplyr::select(upperCI, predicted) %>%
- max() + 5
- #
- baphColors <- c("darkgreen", "darkseagreen")
- densityLabels <- c("Full model pixel prediction",
- "Groupstrap model pixel prediction")
- #
- baphFaceted <- ggplot(data = baphLong,
- aes(x = observedBAPH, y = predicted, color = fit)) +
- facet_wrap(~ fit) +
- xlab('ST plot prediction') +
- ylab('ST model prediction') +
- ggtitle('Group bootstrap of basal area predictions to approximate model based performance') +
- geom_point() +
- geom_abline(intercept = 0, slope = 1) +
- geom_abline(intercept = 0, slope = 1.1, linetype = 2) +
- geom_abline(intercept = 0, slope = 0.9, linetype = 2) +
- geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI, size = nPlots)) +
- xlim(0, baphMax) +
- ylim(0, baphMax) +
- theme_bw() +
- scale_color_manual(values = baphColors, labels = densityLabels)
- print(baphFaceted)
- ```
- ### Basal Area per Acre
- ```{r clientBAPA}
- #
- bapaTable <- baphTable %>%
- mutate_each(., funs(convert_baph_to_bapa), -meta_stand_id, -nPlots)
- for(name in names(bapaTable)) {
- names(bapaTable)[match(name, names(bapaTable))] <- gsub("BAPH", "BAPA", name)
- }
- bapaMax <- bapaTable %>%
- select(upperCI, contains("BAPA")) %>%
- max() + 5
- clientLayers <- list(geom_point(),
- geom_abline(intercept = 0, slope = 1),
- geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI)),
- theme_bw(),
- theme(text = element_text(size = 18),
- title = element_text(size = 18)),
- geom_abline(intercept = 0, slope = 1.1, linetype = 2),
- geom_abline(intercept = 0, slope = 0.9, linetype = 2)
- )
- ggplot(bapaTable, aes(x = observedBAPA, y = groupModelPredBAPA)) +
- xlab('Estimate of basal area (ft2/ac) from plot data') +
- ylab('ST reduced model prediction of basal area (ft2/ac)') +
- xlim(0, bapaMax) +
- ylim(0, bapaMax) +
- clientLayers
- ggplot(bapaTable, aes(x = observedBAPA, y = predBAPA)) +
- xlab('Estimate of basal area (ft2/ac) from plot data') +
- ylab('ST model prediction of basal area (ft2/ac)') +
- xlim(0, bapaMax) +
- ylim(0, bapaMax) +
- clientLayers
- ```
- ### Trees per Hectare
- ```{r groupTPH}
- #
- tphTable <- bootPolygonsResults %>%
- select(meta_stand_id, nPlots, tScore, contains("TPH")) %>%
- mutate(lowerCI = observedTPH - tScore * seObservedTPH,
- upperCI = observedTPH + tScore * seObservedTPH) %>%
- select(meta_stand_id, nPlots, contains("CI"), contains("TPH"))
- pander(tphTable, split.tables = Inf,
- split.cells = Inf,
- justify = 'left',
- digits = 0)
- tphLong <- tphTable %>%
- gather(key = fit, value = predicted,
- predTPH, groupModelPredTPH)
- tphMax <- tphLong %>%
- select(upperCI, predicted) %>%
- max() + 5
- tphColors <- c("darkblue", "deepskyblue")
- tphGroupFacetPlot <- ggplot(data = tphLong,
- aes(x = observedTPH, y = predicted, color = fit)) +
- xlab('ST plot prediction') +
- ylab('ST model prediction') +
- geom_point() +
- geom_abline(intercept = 0, slope = 1) +
- geom_abline(intercept = 0, slope = 1.1, linetype = 2) +
- geom_abline(intercept = 0, slope = 0.9, linetype = 2) +
- geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI, size = nPlots)) +
- scale_color_manual("", values = tphColors, labels = densityLabels) +
- theme(strip.background = element_blank(), strip.text.x = element_blank()) +
- ggtitle('Group bootstrap of trees per hectare predictions to approximate model-based performance') +
- xlim(0, tphMax) +
- ylim(0, tphMax) +
- theme_bw() +
- facet_wrap( ~ fit)
- print(tphGroupFacetPlot)
- ```
- ### Trees per Acre
- ```{r clientTPA}
- #
- tpaTable <- tphTable %>%
- mutate_each(., funs(convert_acres_to_hectares), -meta_stand_id, -nPlots)
- for (name in names(tpaTable)) {
- names(tpaTable)[match(name, names(tpaTable))] <- gsub("TPH", "TPA", name)
- }
- tpaMax <- tpaTable %>%
- select(upperCI, contains("TPA")) %>%
- max() + 5
- tpaTreelistClientPlot <- ggplot(tpaTable, aes(x = observedTPA,
- y = groupModelPredTPA)) +
- xlab('Estimate of trees per acre from plot data') +
- ylab('ST reduced model prediction of trees per acre') +
- xlim(0, tpaMax) + ylim(0, tpaMax) + clientLayers
- tpaFullModelClientPlot <- ggplot(tpaTable, aes(x = observedTPA,
- y = predTPA)) +
- xlab('Estimate of trees per acre from plot data') +
- ylab('ST model prediction of trees per acre') +
- xlim(0, tpaMax) + ylim(0, tpaMax) + clientLayers
- print(tpaTreelistClientPlot)
- print(tpaFullModelClientPlot)
- ```
- ### Diameter Distribution
- ```{r groupDistro}
- zeroTreeStands <- trainingData %>%
- group_by(meta_stand_id) %>%
- summarize(nPlots = n_distinct(plot_id),
- nZeroes = length(unique(plot_id[tph == 0]))) %>%
- filter(nPlots == nZeroes) %>%
- .$meta_stand_id
- fullDist <- trainingData %>%
- dplyr::select(meta_stand_id, contains('diam')) %>%
- group_by(meta_stand_id) %>%
- dplyr::summarise_each(funs(mean)) %>%
- ungroup() %>%
- filter(meta_stand_id %in% bootPolygons$meta_stand_id) %>%
- mutate(origin = 'trainingData') %>%
- bind_rows(distroPredictions) %>%
- gather(key = diamClass, value = value, -origin, -meta_stand_id) %>%
- mutate(diamClass = as.numeric(gsub('diam_', '', diamClass))) %>%
- filter(value > 0) %>%
- filter(meta_stand_id %nin% zeroTreeStands)
- #
- ggplot(fullDist, aes(x = diamClass, y = value, linetype = origin)) +
- geom_point(alpha = 0.1) + geom_line(alpha = 0.9) +
- facet_wrap( ~ meta_stand_id) + theme_bw() +
- ylab("Relative presence in distribution")
- ```
- ### Species Importance Value
- ```{r groupImp}
- #
- bootSpp <- trainingData %>%
- dplyr::select(meta_stand_id,
- one_of(names(trainingData)[impVarsIDX]),
- tph) %>%
- group_by(meta_stand_id) %>%
- filter(tph > 0 ) %>%
- dplyr::summarise_each(funs(mean)) %>%
- select(-tph) %>%
- ungroup() %>%
- filter(meta_stand_id %in% bootPolygons$meta_stand_id) %>%
- mutate(origin = 'trainingData') %>%
- bind_rows(impPredictions) %>%
- gather(key = spp, value = value, -origin, -meta_stand_id) %>%
- mutate(spp = as.numeric(gsub('spp_', '', spp)),
- common = get_common(spp)) %>%
- filter(meta_stand_id %nin% zeroTreeStands)
- pal12 <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
- "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A",
- "#FFFF99", "#B15928")
- #Get correct number of distinct, non-annoying colors:
- colorCount <- length(unique(bootSpp$common))
- useColors <- colorRampPalette(pal12)(colorCount)
- impGroupBarPlot <- ggplot(bootSpp, aes(x = origin, y = value)) +
- geom_bar(stat = "identity", aes(fill = common)) +
- scale_fill_manual(values = useColors,
- guide = guide_legend(nrow = 18)) +
- facet_wrap(~ meta_stand_id) + theme_bw() +
- ylab('Species Importance Value')
- impGroupLinePlot <- ggplot(bootSpp, aes(x = common, y = value)) +
- geom_line(aes(color = origin, group = origin)) +
- ylab("Species importance") +
- facet_wrap(~ meta_stand_id) + theme_bw() +
- scale_color_manual(values = c("black", "skyblue2")) +
- theme(axis.text.x=element_text(angle=45, hjust = 1))
- print(impGroupBarPlot)
- print(impGroupLinePlot)
- ```
- ------------
- Memory Flush
- ------------
- ```{r saveCore}
- # save objects
- save(tphPooled, baphPooled, distroSet,
- sppMods, predict_spp_importance,
- baphPrecision, tphPrecision,
- impBespokePredictors, distroBespokePredictors,
- transform_the_predictors,
- transform_the_baph_prediction,
- transform_the_tph_prediction,
- wrapMods,
- strataVar,
- populationCruiseLimits,
- strataCruiseLimits, stratTab,
- baphCoeffTable, tphCoeffTable,
- baphPredFrame, tphPredFrame,
- file = file.path(modelDir, 'core_models.Rda'))
- save(standRatios,
- strataRatios,
- populationRatios,
- strataVar,
- strataDistroRatios,
- populationDistroRatios,
- standDistroRatios,
- strataImpRatios,
- populationImpRatios,
- standImpRatios,
- maCIs,
- file = file.path(modelDir, 'standRatios.Rda'))
- write.csv(baphTable, file.path(reportDir, "baphTable.csv"), row.names = F)
- write.csv(tphTable, file.path(reportDir, "tphTable.csv"), row.names = F)
- write.csv(bapaTable, file.path(reportDir, "bapaTable.csv"), row.names = F)
- write.csv(tpaTable, file.path(reportDir, "tpaTable.csv"), row.names = F)
- ```
- --------------
- Total Height Model
- --------------
- ```{r heightPrep, eval=eval_height}
- medianBAPA <- median(trainingData$baph) * 4.356
- # convert total/stopper heights to feet
- cruiseDataNoZero$hm <- convert_meters_to_feet(cruiseDataNoZero$hm)
- cruiseDataNoZero$hp <- convert_meters_to_feet(cruiseDataNoZero$hp)
- cruiseDataNoZero$hs <- convert_meters_to_feet(cruiseDataNoZero$hs)
- lat <- mean(parse_lat_lon(cruiseData$loc, coord = 'lat'))
- lon <- mean(parse_lat_lon(cruiseData$loc, coord = 'lon'))
- radius <- 150
- gleanedFile <- file.path(modelDir, 'gleanedList.Rda')
- if (file.exists(gleanedFile)) {
- load(gleanedFile)
- } else {
- minSpp <- 0
- while (radius < 250 & minSpp < 3) {
- gleanedList <- glean_tree_records(lat, lon, radius)
- gleanSummary <- gleanedList$trees %>%
- mutate(st_species = fia2st(SPCD)) %>%
- filter(st_species %in% unique(cruiseData$st_species)) %>%
- group_by(st_species) %>%
- summarize(count = length(st_species))
- minSpp <- min(gleanSummary$count)
- radius <- radius + 50
- }
- save(gleanedList, file = gleanedFile)
- }
- # we need to drop snags
- cruiseDataHeight <- cruiseDataNoZero %>%
- select(plot_id, st_species, common, diameter, expansion,
- hm, tqi, start_height_1, hs, hp) %>%
- left_join(trainingData %>%
- mutate(plot_id = as.character(plot_id)) %>%
- select(plot_id, baph, tph))
- # create a plottable height dataset
- cruiseDataHtPlot <- cruiseDataHeight %>%
- select(common, diameter, hm, tqi, hs, hp) %>%
- gather(heightType, height, -common, -diameter, -tqi) %>%
- mutate(height_type = case_when(heightType == 'hm' ~ 'total',
- heightType == 'hs' ~ 'saw_stopper',
- heightType == 'hp' ~ 'pulp_stopper')) %>%
- filter(!is.na(height))
- totalAndMerchHtPlot <- ggplot(data = cruiseDataHtPlot,
- aes(x = diameter, y = height, color = height_type)) +
- geom_point(alpha = 0.4) +
- facet_wrap(~ common, labeller = labeller(common = label_wrap_gen(width = 15))) +
- theme_bw()
- print(totalAndMerchHtPlot)
- ```
- ## FIA height models
- ```{r heightFIA, eval=eval_height}
- # drop species that aren't in FIA database
- cruiseDataFia <- cruiseDataHeight %>%
- mutate(SPCD = st2fia(st_species)) %>%
- filter(SPCD %in% unique(gleanedList$trees$SPCD))
- htModelFIA <- freq_height(species = cruiseDataFia$st_species,
- diameter = cruiseDataFia$diameter,
- height = cruiseDataFia$hm,
- expansion = cruiseDataFia$expansion,
- bapa = medianBAPA,
- latitude = lat,
- longitude = lon,
- speciesCode = 'ST',
- gleanedData = gleanedList)
- cruiseDataHeight$predHeightFia <- predict(htModelFIA,
- newSpp = cruiseDataHeight$st_species,
- newDiam = cruiseDataHeight$diameter,
- speciesCode = 'ST')
- if(!all(is.na(cruiseDataHeight$hm))) {
- fiaHtPlot <- ggplot(aes(x = hm, y = predHeightFia),
- data = cruiseDataHeight) +
- geom_point(alpha = 0.2) +
- theme_bw() +
- geom_abline() +
- facet_wrap(~common) +
- ggtitle('FIA height models')
- print(fiaHtPlot)
- }
- ```
- ## Local height models
- ```{r heightLocal, eval=eval_height}
- # drop species that have fewer than 5 observations
- cruiseDataLocal <- cruiseDataHeight %>%
- group_by(common) %>%
- mutate(n = n()) %>%
- filter(n >= 5) %>%
- ungroup() %>%
- filter(!is.na(hm))
- # if no total height observations, do not try to fit local height models
- if(nrow(cruiseDataLocal) > 0) {
- htModelLocal <- freq_height(species = cruiseDataLocal$st_species,
- diameter = cruiseDataLocal$diameter,
- height = cruiseDataLocal$hm)
- cruiseDataHeight$predHeightLocal <- predict(htModelLocal,
- newSpp = cruiseDataHeight$st_species,
- newDiam = cruiseDataHeight$diameter,
- speciesCode = 'ST')
- localHtPlot <- ggplot(aes(x = hm, y = predHeightLocal),
- data = cruiseDataHeight) +
- geom_point(alpha = 0.2) +
- theme_bw() +
- geom_abline() +
- facet_wrap(~common, labeller =
- labeller(common = label_wrap_gen(width = 15)))
- print(localHtPlot)
- }
- ```
- ## Final height models
- Diagnostics for the final height model are displayed below. The default choice is the local freq_height model. Change this if you want to use the FIA model or a hybrid FIA/local moodel.
- ```{r heightFinal, eval=eval_height}
- # htModel <- htModelFIA
- htModel <- htModelLocal
- # make predictions to cruised trees using final model
- cruiseDataHeight$predHeightFinal <- predict(htModel,
- newSpp = cruiseDataHeight$st_species,
- newDiam = cruiseDataHeight$diameter,
- speciesCode = 'ST')
- heightComp <- cruiseDataHeight %>%
- select(diameter, common, expansion,
- hm, predHeightFinal) %>%
- gather(source, totalHeight, hm, predHeightFinal,
- -diameter, -common, -expansion) %>%
- mutate(source = case_when(source == 'total_height' ~ 'observed',
- source == 'predHeightFinal' ~ 'predicted'))
- htDiamPlot <- ggplot(data = heightComp,
- aes(x = diameter, y = totalHeight,
- color = source)) +
- geom_point(alpha = 0.4) +
- theme_bw() +
- facet_wrap(~ common,
- labeller =
- labeller(common = label_wrap_gen(width = 15))) +
- ylab('Total height (ft)') +
- xlab('DBH (in)') +
- ggtitle('Total height model predictions vs observed values')
- print(htDiamPlot)
- clientHt <- htDiamPlot +
- scale_color_manual(values = c('#30363E', #stDarkGreen
- '#347a35')) #stGrey
- print(clientHt)
- print(plot(htModel))
- save(htModel, file = file.path(modelDir, 'htModel.Rda'))
- ```
- --------------
- Merchantable Height Model
- --------------
- ```{r auxProportionalSystem}
- # generic proportional assignment by diameter, spp or sppGroup, and height class
- # can be used for any tree-level variable
- build_aux_model <- function(trees, # modeling dataset
- attribute, # name of attribute to be modeled
- stand = F,
- spp = F, # group by species?
- sppGroup = NULL, # group by a species group from st_sp2 instead of common name?
- htClass = F, # group by height class?
- groupVars = NULL # other grouping variable from metadata?
- ) {
- # identify stands that will get stand-specific predictions
- if(stand) {
- modStands <- trainingData %>%
- filter(plot_id %nin% zeroPlots) %>%
- select(meta_stand_id, plot_id) %>%
- group_by(meta_stand_id) %>%
- summarize(nPlots = n()) %>%
- filter(nPlots >= 5) %>%
- .$meta_stand_id
- } else {
- modStands <- c()
- }
- dat <- trees %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id)))
- # define the group that each tree is in
- if(spp == T) {
- dat$group <- dat$common
- }
- if(!is.null(sppGroup)) {
- if(sppGroup %nin% c('class', 'group', 'genus')) {
- stop('sppGroup must be one of these: class, group, or genus')
- }
- dat$group <- st_sp2[[sppGroup]][match(dat$common, st_sp2$common)]
- }
- if(htClass == T) {
- # predict height if tree does not have total height
- dat$finalPredHeight <- predict(htModel,
- newSpp = dat$st_species,
- newDiam = dat$diameter,
- speciesCode = 'ST')
- # height for height class is either measured height, or predicted
- dat$finalPredHeight[!is.na(dat$total_height)] <- dat$total_height[!is.na(dat$total_height)]
- # maximum observed height rounded up to nearest 10
- maxHt <- 10 * ceiling(max(dat$finalPredHeight, na.rm = T) / 10)
- # 10 ft height classes
- dat$htClass <- cut(dat$finalPredHeight,
- breaks = seq(0, maxHt, by = 10),
- labels = seq(5, maxHt - 5, by = 10))
- # tack height class onto existing groupings
- dat$group <- paste(dat$group, dat$htClass, sep = '_')
- }
- if(!is.null(groupVars)) {
- dat <- dat %>%
- mutate_at(vars(one_of(groupVars)), funs(as.character)) %>%
- unite(group, one_of('group', groupVars), sep = '_')
- }
- # proportions in each group x diameter combination
- tableVars <- c('group', 'diameter')
- propTab <- dat %>%
- select(one_of(tableVars), expansion,
- attr = one_of(attribute)) %>%
- mutate(diameter = round(diameter)) %>%
- group_by_at(vars(one_of(tableVars), attr)) %>%
- summarize(n = sum(expansion)) %>%
- mutate(prop = n / sum(n)) %>%
- select(-n) %>%
- ungroup()
- # proportions in each group x diameter x stand combination
- standGroupVars <- c('meta_stand_id', 'group', 'diameter')
- propTabStand <- dat %>%
- select(one_of(standGroupVars), expansion,
- attr = one_of(attribute)) %>%
- mutate(diameter = round(diameter)) %>%
- group_by_at(vars(one_of(standGroupVars), attr)) %>%
- summarize(n = sum(expansion)) %>%
- mutate(prop = n / sum(n)) %>%
- select(-n) %>%
- ungroup() %>%
- filter(meta_stand_id %in% modStands)
- predict_aux <- function(treeList, model) {
- propTab <- model$propTab
- propTabStand <- model$propTabStand
- attribute <- model$attribute
- spp <- model$spp
- sppGroup <- model$sppGroup
- groupVars <- model$groupVars
- htClass <- model$htClass
- temp <- treeList %>%
- mutate(diameter = round(diameter)) %>%
- mutate(rec = row_number())
- if(spp == T) {
- temp$group <- temp$common
- }
- if(!is.null(sppGroup)) {
- temp$group <- st_sp2[[sppGroup]][match(temp$common, st_sp2$common)]
- }
- if(htClass == T) {
- maxHt <- 10 * ceiling(max(temp$total_height, na.rm = T) / 10)
- temp$htClass <- cut(temp$total_height,
- breaks = seq(0, maxHt, by = 10),
- labels = seq(5, maxHt - 5, by = 10))
- temp$group <- paste(temp$group, temp$htClass, sep = '_')
- }
- if(!is.null(groupVars)) {
- temp <- temp %>%
- mutate_at(vars(one_of(groupVars)), funs(as.character)) %>%
- unite(group, one_of('group', groupVars), sep = '_')
- }
- # unique diameter x group combinations
- combos <- temp %>%
- select(group, diameter) %>%
- distinct() %>%
- arrange(group, diameter)
- predict_by_group <- function(grp, diam) {
- subTemp <- temp %>%
- filter(round(diameter) == diam,
- group == grp)
- # grab stand-level proportions if stand is in table
- if(unique(subTemp$meta_stand_id) %in% unique(propTabStand$meta_stand_id)) {
- probs <- propTabStand %>%
- ungroup() %>%
- filter(group == grp, meta_stand_id == unique(treeList$meta_stand_id)) %>%
- mutate(diamDist = abs(diameter - diam)) %>%
- filter(diamDist == min(diamDist)) %>%
- select(-diamDist)
- if(nrow(probs) < 1) {
- probs <- propTab %>%
- ungroup() %>%
- filter(group == grp) %>%
- mutate(diamDist = abs(diameter - diam)) %>%
- filter(diamDist == min(diamDist)) %>%
- select(-diamDist)
- }
- if(nrow(probs) < 1 & htClass) {
- decomposed <- subTemp %>%
- mutate(htClass = as.numeric(strsplit(grp, '_')[[1]][2]),
- common = strsplit(grp, '_')[[1]][1])
- spp <- unique(decomposed$common)
- ht <- unique(decomposed$htClass)
- probs <- propTab %>%
- ungroup() %>%
- separate(group, c('common', 'htClass'), sep = '_') %>%
- mutate(htClass = as.numeric(htClass)) %>%
- filter(common == spp) %>%
- mutate(diamDist = abs(diameter - diam),
- htDist = abs(htClass - ht)) %>%
- filter(diamDist == min(diamDist)) %>%
- filter(htDist == min(htDist)) %>%
- select(-diamDist, -htDist)
- }
- } else { # otherwise used pooled proportions
- probs <- propTab %>%
- ungroup() %>%
- filter(group == grp) %>%
- mutate(diamDist = abs(diameter - diam)) %>%
- filter(diamDist == min(diamDist)) %>%
- select(-diamDist)
- }
- if(nrow(probs) > 1) {
- subTemp$attr <- sample(probs$attr, size = nrow(subTemp),
- prob = probs$prop, replace = T)
- }
- if(nrow(probs) == 1) {
- subTemp$attr <- probs$attr
- }
- return(subTemp)
- }
- predict_all <- function(i) {
- run <- combos[i, ]
- preds <- predict_by_group(diam = run$diameter,
- grp = run$group)
- }
- outTrees <- bind_rows(lapply(1:nrow(combos), predict_all)) %>%
- select(rec, attr)
- outPreds <- temp %>%
- left_join(outTrees, by = 'rec') %>%
- arrange(rec) %>%
- .$attr
- return(outPreds)
- }
- outList <- list(
- propTab = propTab, propTabStand = propTabStand,
- predict_aux = predict_aux,
- attribute = attribute,
- spp = spp,
- sppGroup = sppGroup,
- htClass = htClass,
- groupVars = groupVars)
- return(outList)
- }
- ```
- ## Merch height proportional assignment system
- ```{r merch_heightProportional, eval = eval_merch_height, results = 'markup'}
- # TODO incorporate HFM-style two-stage stopper height modeling framework
- # e.g. predict occurrence of stopper height, then predict stopper height for
- # trees that get a stopper height
- # if NA merch heights are trees on which merch height was not observed,
- # filter those out.
- # otherwise, do not filter out NAs since an NA might mean no stopper height
- merchDat <- cruiseDataNoZero
- #Saw stopper height
- hsPropSet <- build_aux_model(trees = merchDat,
- attribute = 'hs',
- spp = T,
- sppGroup = NULL,
- htClass = F,
- stand = F,
- groupVars = NULL)
- predict_hs <- hsPropSet$predict_aux
- #Pulp stopper height
- hpPropSet <- build_aux_model(trees = merchDat,
- attribute = 'hp',
- spp = T,
- sppGroup = NULL,
- htClass = F,
- stand = F,
- groupVars = NULL)
- predict_hp <- hpPropSet$predict_aux
- fullTl <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- filter(!is.na(meta_stand_id))
- stands <- unique(fullTl$meta_stand_id)
- # predict merch height then summarize by diameter and species for each stand
- compTab <- bind_rows(lapply(stands,
- function(stand) {
- standTl <- fullTl %>%
- filter(meta_stand_id == stand)
- standTl$hs_pred <- predict_hs(
- treeList = standTl,
- model = hsPropSet)
- standTl$hp_pred <- predict_hp(
- treeList = standTl,
- model = hpPropSet
- )
- out <- standTl %>%
- select(meta_stand_id, diameter, common, expansion,
- hs, hp, hs_pred, hp_pred) %>%
- gather(source, mht, hs_pred, hs, hp_pred, hp, -meta_stand_id,
- -diameter, -common, -expansion) %>%
- mutate(tpa = expansion,
- bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
- group_by(meta_stand_id, diameter, common, source) %>%
- summarize(meanMerchHeight = mean(mht, na.rm = T),
- nTrees = n()) %>%
- mutate(source = ifelse(source == 'hs', 'observed', 'predicted'))
- return(out)
- })
- )
- # make plot a little better
- compTab$source <- factor(compTab$source, levels = c('observed', 'predicted'))
- ggplot(data = compTab, aes(x = diameter, y = meanMerchHeight,
- color = source)) +
- geom_jitter(alpha = 0.5) +
- theme_bw() +
- facet_wrap(~ common,
- labeller = labeller(common = label_wrap_gen(width = 15))) +
- ylab('mean merchantable height (ft)') +
- xlab('DBH (in)')
- # if you are happy with the proportional assignment, you can skip the parametric
- # modeling section.
- ```
- ## Parametric merch height model
- ```{r merch_heightModel, eval = eval_merch_height}
- ### This default should change to beta regression
- #
- # merchTrain <- cruiseDataNoZero %>%
- # select(st_species, common, diameter, hm, hs, hp) %>%
- # mutate(predHt = predict(htModel,
- # newSpp = st_species,
- # newDiam = diameter,
- # speciesCode = "ST")) %>%
- # mutate(hs_pct = hs / hm,
- # pred_hs_pct = hs / predHt,
- # hp_pct = hp / hm,
- # pred_hp_pct = hp / predHt)
- #
- # # Counts the percentage of records that have a merch height recorded
- # pctData <- merchTrain %>%
- # mutate(hasHs = ifelse(!is.na(hs) & !is.na(hm), T, F),
- # hasHp = ifelse(!is.na(hp) & !is.na(hm), T, F)) %>%
- # summarize(hasHs = mean(hasHs),
- # hasHp = mean(hasHp))
- #
- # numHsPct <- pctData$hasHs * 100
- #
- # numHpPct <- pctData$hasHp * 100
- #
- # #
- # catMsg(paste(round(numHsPct, 2), "percent of the plot data had saw stopper and total
- # heights recorded", sep = " "))
- #
- # catMsg(paste(round(numHpPct, 2), "percent of the plot data had pulp stopper and total
- # heights recorded", sep = " "))
- #
- #
- # # If 15% or more trees have merch ht and
- # # total height, models are based on recorded data.
- # # If less than 15%, predicted total height will be
- # # used. 15% is arbitrary and can be changed.
- # if (numHsPct >= .15) {
- # hsModel <- lmer(asin(hs_pct) ~ diameter +
- # (1 + diameter | common),
- # data = merchTrain)
- # } else {
- # hsModel <- lmer(asin(pred_hs_pct) ~ diameter +
- # (1 + diameter | common),
- # data = merchTrain)
- #
- # warning("Predicted total heights were used")
- # }
- #
- # if (numHpPct >= .15) {
- # hpModel <- lmer(asin(hp_pct) ~ diameter +
- # (1 + diameter | common),
- # data = merchTrain)
- # } else {
- # hpModel <- lmer(asin(pred_hp_pct) ~ diameter +
- # (1 + diameter | common),
- # data = merchTrain)
- #
- # warning("Predicted total heights were used")
- # }
- #
- # summary(hsModel)
- # summary(hpModel)
- #
- # # predict method for asin - MODIFY if changing the model form
- # predict_merch <- function(treeList, model) {
- # predPcts <- predict(model$model, newdata = treeList,
- # allow.new.levels = T)
- #
- # predHeights <- treeList$total_height * sin(predPcts) # for arcsine transformation
- #
- # return(predHeights)
- # }
- #
- # hsModelSet <- list(hsModel = hsModel, hpModel = hpModel, predict_aux = predict_merch)
- #
- #
- # if (pctRecordTotHt >= .15) {
- # merchTrain$merch_hat <- predict_merch(treeList = merchTrain,
- # model = merchModelSet)
- # } else {
- # merchTrain$total_height <- merchTrain$predHt
- # merchTrain$merch_hat <- predict_merch(treeList = merchTrain,
- # model = merchModelSet)
- # }
- #
- # ggplot(merchTrain, aes(x = stop_height_1, y = merch_hat)) +
- # geom_point(alpha = 0.5) +
- # facet_wrap(~ common,
- # labeller = labeller(common = label_wrap_gen(width = 15))) +
- # theme_bw() +
- # geom_abline(intercept = 0, slope = 1) +
- # xlab("Observed merch height (feet)") +
- # ylab("Predicted merch height (feet)")
- #
- ```
- Select either the parametric or a the proportional merch model system. The default choice is the proportional assignment system. Change this if you want to use a statistical model.
- ```{r pickMerchModel, eval = eval_merch_height}
- # by default, the merch model is the proportional assignment system
- # change to merch Model if you desire to use the statistical model
- hsSet <- hsPropSet
- hpSet <- hpPropSet
- # merchSet <- merchModelSet
- save(hsSet, hpSet, file = file.path(modelDir, 'merchModel.Rda'))
- ```
- --------------
- Grade Model
- --------------
- ```{r grade, eval = eval_grade}
- # TODO: Convert to segment/product handling:
- # build a product field based on how we want to predict grade
- # e.g. if multiple segments, slap segment lengths/grades into one field
- # if NA grades are trees on which grade was not observed,
- # filter those out.
- # otherwise, do not filter out NAs since an NA might mean no grade assignment!
- # table(cruiseDataNoZero$common, cruiseDataNoZero$segment_product_1, useNA = 'always')
- gradeDat <- cruiseDataNoZero %>%
- filter(!is.na(tqi))
- gradeSet <- build_aux_model(trees = gradeDat,
- attribute = 'tqi',
- spp = T,
- sppGroup = NULL,
- htClass = F,
- stand = F,
- groupVars = NULL)
- predict_grade <- gradeSet$predict_aux
- # test it out
- fullTl <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- filter(!is.na(meta_stand_id))
- stands <- unique(fullTl$meta_stand_id)
- # predict grade then summarize by diameter and species for each stand
- compTabGrade <- bind_rows(lapply(stands,
- function(stand) {
- standTl <- fullTl %>%
- filter(meta_stand_id == stand)
- standTl$grade_pred <- predict_grade(
- treeList = standTl,
- model = gradeSet
- )
- out <- standTl %>%
- select(meta_stand_id, diameter, common, expansion,
- tqi, grade_pred) %>%
- gather(source, grade, tqi, grade_pred, -meta_stand_id,
- -diameter, -common, -expansion) %>%
- mutate(tpa = expansion) %>%
- group_by(meta_stand_id, diameter, common, source, grade) %>%
- summarize(gradeTpa = sum(tpa)) %>%
- mutate(gradeProp = gradeTpa / sum(gradeTpa)) %>%
- ungroup() %>%
- mutate(source = ifelse(source == 'segment_product_1', 'observed', 'predicted'))
- return(out)
- })
- )
- # TODO think of a way to plot grade model diagnostics
- save(gradeSet, file = file.path(modelDir, 'gradeModel.Rda'))
- ```
- --------------
- Crown Ratio Model
- --------------
- ## Crown ratio proportional assignment system
- ```{r crownRatio_proportional, eval = eval_crown}
- crownDat <- cruiseDataNoZero %>%
- filter(!is.na(thi))
- crownPropSet <- build_aux_model(trees = crownDat,
- attribute = 'thi',
- spp = T,
- sppGroup = NULL,
- htClass = F,
- stand = F,
- groupVars = NULL)
- fullTl <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- filter(!is.na(meta_stand_id))
- stands <- unique(fullTl$meta_stand_id)
- predict_crown <- crownPropSet$predict_aux
- # predict merch height then summarize by diameter and species for each stand
- compTabCrown <- bind_rows(lapply(stands,
- function(stand) {
- standTl <- fullTl %>%
- filter(meta_stand_id == stand)
- standTl$crownRatioPred <- predict_crown(
- treeList = standTl,
- model = crownPropSet
- )
- out <- standTl %>%
- select(meta_stand_id, diameter, common, expansion,
- thi, crownRatioPred) %>%
- gather(source, crown_ratio, thi, crownRatioPred, -meta_stand_id,
- -diameter, -common, -expansion) %>%
- mutate(tpa = expansion,
- bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
- group_by(meta_stand_id, diameter, common, source) %>%
- summarize(meanCrownRatio = mean(crown_ratio),
- nTrees = n()) %>%
- mutate(source = ifelse(source == 'crownRatio', 'observed', 'predicted'))
- return(out)
- })
- )
- # make plot a little better
- compTabCrown$source <- factor(compTabCrown$source, levels = c('observed', 'predicted'))
- # ggplot(data = compTabCrown, aes(x = diameter, y = meanCrownRatio,
- # color = source)) +
- # geom_point(alpha = 0.5) +
- # theme_bw() +
- # facet_wrap(~ common,
- # labeller = labeller(common = label_wrap_gen(width = 15))) +
- # ylab('mean crown ratio') +
- # xlab('DBH (in)')
- # if you are happy with the proportional assignment, you can skip the parametric
- # modeling section.
- ###DAMAGE####
- damageDat <- cruiseDataNoZero %>%
- filter(!is.na(thinDamage))
- damagePropSet <- build_aux_model(trees = damageDat,
- attribute = 'thinDamage',
- spp = T,
- sppGroup = NULL,
- htClass = F,
- stand = F,
- groupVars = NULL)
- fullTl <- cruiseDataNoZero %>%
- left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
- filter(!is.na(meta_stand_id))
- stands <- unique(fullTl$meta_stand_id)
- predict_damage <- damagePropSet$predict_aux
- # predict merch height then summarize by diameter and species for each stand
- compTabDamage <- bind_rows(lapply(stands,
- function(stand) {
- standTl <- fullTl %>%
- filter(meta_stand_id == stand)
- standTl$damagePred <- predict_damage(
- treeList = standTl,
- model = damagePropSet
- )
- out <- standTl %>%
- select(meta_stand_id, diameter, common, expansion,
- thinDamage, damagePred) %>%
- gather(source, damage, thinDamage, damagePred, -meta_stand_id,
- -diameter, -common, -expansion) %>%
- mutate(tpa = expansion,
- bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
- group_by(meta_stand_id, diameter, common, source) %>%
- summarize(meanDamage = mean(damage),
- nTrees = n()) %>%
- mutate(source = ifelse(source == 'damage', 'observed', 'predicted'))
- return(out)
- })
- )
- # make plot a little better
- compTabDamage$source <- factor(compTabDamage$source, levels = c('observed', 'predicted'))
- # ggplot(data = compTabDamage, aes(x = diameter, y = meanDamage,
- # color = source)) +
- # geom_point(alpha = 0.5) +
- # theme_bw() +
- # facet_wrap(~ common,
- # labeller = labeller(common = label_wrap_gen(width = 15))) +
- # ylab('mean crown ratio') +
- # xlab('DBH (in)')
- ```
- ## Crown ratio statistical model
- ```{r crownRatio_parametric, eval = eval_crown}
- #
- # # this is a similar model form to the merch model option from above
- # crownModel <- lmer(asin(crownRatio) ~ diameter +
- # (1 + diameter | common),
- # data = crownDat)
- #
- # summary(crownModel)
- #
- # # predict method for asin transformed CR - MODIFY if changing the model form
- # predict_crown <- function(treeList, model) {
- # predCrownRatios <- sin(predict(model$model, newdata = treeList,
- # allow.new.levels = T))
- #
- # return(predCrownRatios)
- # }
- #
- # crownModelSet <- list(model = crownModel, predict_aux = predict_crown)
- #
- # crownDat$crownPred <- predict_crown(treeList = crownDat, model = crownModelSet)
- #
- #
- # ggplot(crownDat, aes(x = crownRatio, y = crownPred)) +
- # geom_point(alpha = 0.5) +
- # facet_wrap(~ common,
- # labeller = labeller(common = label_wrap_gen(width = 15))) +
- # theme_bw() +
- # geom_abline(intercept = 0, slope = 1) +
- # xlab("observed crown ratio") +
- # ylab("predicted crown ratio")
- ```
- ## Crown ratio model selection
- The default choice is the proportional assignment system. Change this if you want to use a statistical model.
- ```{r pickCrownModel, eval = eval_crown}
- # by default, the crown ratio model is the proportional assignment system
- crownSet <- crownPropSet
- damageSet <- damagePropSet
- # crownSet <- crownModelSet
- save(crownSet, damageSet, file = file.path(modelDir, 'crownModel.Rda'))
- ```
- -----------------------
- Taper Coefficients
- -----------------------
- ```{r volume, eval=eval_taper}
- if(eval_taper & !eval_height) {
- stop('Tried to obtain volume coefficients without fitting height model')
- }
- cruiseDataNoZero$ht_hat <- predict(htModel, newSpp = cruiseDataNoZero$st_species,
- newDiam = cruiseDataNoZero$diameter)
- measuredHts <- cruiseDataNoZero$total_height
- htTrees <- which(!is.na(measuredHts))
- cruiseDataNoZero$ht_hat[htTrees] <- measuredHts[htTrees]
- taperCoeffs <- taper(species = cruiseDataNoZero$st_species,
- diameter = cruiseDataNoZero$diameter,
- expansion = cruiseDataNoZero$expansion,
- height = cruiseDataNoZero$ht_hat,
- bapa = medianBAPA,
- latitude = lat,
- longitude = lon,
- gleanedData = gleanedList,
- speciesCode = 'ST',
- parallel = T)
- pandoc.table(taperCoeffs,
- split.tables = Inf,
- split.cells = Inf,
- justify = 'left',
- digits = 4)
- for(sp in unique(cruiseDataNoZero$st_species)) {
- taperPlot <- plot(taperCoeffs, species = sp, speciesCode ='ST')
- print(taperPlot)
- }
- save(taperCoeffs, file = file.path(modelDir, 'taperCoeffs.Rda'))
- ```
Add Comment
Please, Sign In to add comment