Guest User

Untitled

a guest
Jun 20th, 2018
331
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 76.50 KB | None | 0 0
  1. ---
  2. title: "Plot Reduce Diagnostic Report (`r jobslug`)"
  3. author: "SilviaTerra, LLC"
  4. date: "`r as.character(format(Sys.time(), format = '%B %d %Y %X'))`"
  5. output: html_document
  6. ---
  7.  
  8. ```{r setup, include = F}
  9. library(boot)
  10. library(dplyr)
  11. library(doParallel)
  12. library(ggplot2)
  13. library(glmnet)
  14. library(lme4)
  15. library(nnet)
  16. library(parallel)
  17. library(pander)
  18. library(randomForest)
  19. library(tidyr)
  20. library(tsalgo)
  21. library(tsutils)
  22. library(stheight)
  23. library(stvolume)
  24. library(rstanarm)
  25. options(mc.cores = parallel::detectCores() - 2)
  26. library(bayesplot)
  27.  
  28. opts_chunk$set(echo = FALSE,
  29. message = FALSE,
  30. warning = FALSE,
  31. error = FALSE,
  32. cache = TRUE,
  33. dev = 'png',
  34. fig.path = figureDir,
  35. cache.path = cacheDir,
  36. fig.width = 10,
  37. fig.height = 10,
  38. results = 'asis',
  39. comment = NA)
  40.  
  41. options(show.signif.stars = FALSE)
  42.  
  43. ```
  44.  
  45. ```{r debug, eval = F}
  46. jobslug <- 'hfm-2018-f1-cc'
  47. get_prepped(jobslug, setup = T)
  48. groupSize <- 5
  49. ```
  50.  
  51. ```{r dataPrep, include = FALSE}
  52. cruiseData <- read_cruiseDat(cruiseJson)
  53.  
  54. #filter this two different ways
  55. cruiseDataNoZero <- cruiseData %>%
  56. filter(num_records > 0) %>%
  57. filter(!is.na(diameter))
  58.  
  59. trainingData <- read_td(trainingFile)
  60.  
  61. trainingData$tph <- as.integer(trainingData$tph)
  62.  
  63. ######## Define predictors for models
  64.  
  65. distroVarsIDX <- grep("diam", x = names(trainingData))
  66. impVarsIDX <- grep("spp", x = names(trainingData))
  67. impVarsIDX <- impVarsIDX[!grepl('meta', names(trainingData)[impVarsIDX])]
  68.  
  69. pcScoresIDXRaw <- grep('pca', x = names(trainingData))
  70. nonPcas <- grep('nonpca', x = names(trainingData))
  71.  
  72. pcScoresIDX <- pcScoresIDXRaw[pcScoresIDXRaw %nin% nonPcas]
  73. defaultPredictors <- paste0('pca', 1:length(pcScoresIDX), collapse = '+')
  74.  
  75. #Pull in the HFM strata table
  76. system('s3cmd get s3://silviaterra-sequoia/hfm-2018/hfm-strata-tab.csv /tmp/')
  77.  
  78. stratTab <- read.csv('/tmp/hfm-strata-tab.csv')
  79.  
  80. #Modify **AND RUN** to define properly!
  81. transform_the_predictors <- function(pixeldataframe) {
  82. #add any necessary transformations to the X variables
  83. # for example:
  84. #pixeldataframe$pca1cubed <- pixeldataframe$pca1 ^ 3
  85. # This is also a good place to deal with factors that will be used
  86. # in bespoke models.
  87. pixeldataframe <- left_join(pixeldataframe, stratTab, by = c('meta_Strata' = 'Strata')) %>%
  88. mutate(meta_megaStrata = megaStrata) %>%
  89. select(-CruiseType, -ForType, -megaStrata)
  90. return(pixeldataframe)
  91. }
  92.  
  93. trainingData <- transform_the_predictors(trainingData)
  94.  
  95. # check metadata for factor levels not in training data
  96. system(paste0('s3cmd get --force s3://silviaterra-sequoia/', jobslug, '/setup/bounds/metadata.csv /tmp/metadata.csv'))
  97. metadata <- read.csv('/tmp/metadata.csv')
  98.  
  99. tdStrata <- unique(trainingData$meta_megaStrata)
  100. allStrata <- metadata %>%
  101. left_join(stratTab) %>%
  102. select(megaStrata) %>%
  103. distinct() %>%
  104. .$megaStrata
  105.  
  106. if(!all(as.character(allStrata) %in% as.character(tdStrata))) {
  107. stop('there are strata in the metadata that are not present in training data')
  108. }
  109.  
  110. stemsPooledFormula <- as.formula(paste('tph + 1 ~ ', defaultPredictors))
  111. baPooledFormula <- as.formula(paste('baph + 1 ~ ', defaultPredictors))
  112.  
  113. sizeTest <- trainingData %>%
  114. dplyr::group_by(meta_stand_id) %>%
  115. dplyr::summarize(nPlots = n()) %>%
  116. dplyr::ungroup()
  117.  
  118. if (groupSize > max(sizeTest$nPlots)) {
  119. stop("Group size is larger than the maximum number of plots in any stand")
  120. }
  121.  
  122. if (groupSize < 4) {
  123. stop("Group size is less than 4. What are you thinking?")
  124. }
  125.  
  126. fullTrainingData <- trainingData %>%
  127. mutate(cruiser = cruiseData$cruiser[match(plot_id, cruiseData$plot_id)])
  128.  
  129. zeroPlots <- fullTrainingData$plot_id[grep('zero', fullTrainingData$cruiser)]
  130.  
  131. ```
  132.  
  133. ----------------------
  134.  
  135. Basal area models
  136. ----------------------
  137. ### BAPH POOLED MODEL
  138. ```{r baphPooled}
  139.  
  140. #Deprecated Stan mean model until we figure out a permanent solution
  141. # baphPooled <- stan_glm(baPooledFormula, data = fullTrainingData,
  142. # family = Gamma(link = 'log'))
  143.  
  144. #previous default pooled form - currently used for mean estimates
  145. baphPooledDefault <- glm(baPooledFormula, data = fullTrainingData,
  146. family = gaussian(link = 'log'),
  147. control = list(maxit = 1e3))
  148.  
  149. #currently being used to get precision estimates
  150. baphPrecisionDefault <- stan_glm(baPooledFormula, data = fullTrainingData,
  151. family = gaussian(link = 'identity'))
  152.  
  153.  
  154.  
  155. #Models using plot inclusion probabilities
  156. # totalAcres <- sum(metadata$stand_acreage)
  157. # cruisedStands <- unique(fullTrainingData$meta_stand_id)
  158. # cruiseWeights <- metadata %>%
  159. # filter(stand_id %in% cruisedStands) %>%
  160. # mutate(weight = stand_acreage / totalAcres) %>%
  161. # select(stand_id, weight)
  162. #
  163. #fullTrainingData <- left_join(fullTrainingData, stand_weights, by = c('meta_stand_id' = 'stand_id'))
  164. #
  165. # #currently being used to get precision estimates
  166. # baphPooledIdent <- stan_glm(baPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
  167. # family = gaussian(link = 'identity'))
  168. #
  169. # #previous default pooled form - currently used for mean estimates
  170. # baphGlm <- glm(baPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
  171. # family = gaussian(link = 'log'),
  172. # control = list(maxit = 1e3))
  173.  
  174.  
  175.  
  176.  
  177. ```
  178.  
  179. ```{r baphBespoke}
  180.  
  181. # cat('### Bespoke model')
  182. # ---------------------------------------------------------------------------
  183. ### BEGIN BESPOKE FITTING - fit by hand below
  184. # ---------------------------------------------------------------------------
  185.  
  186. bespokePredictors <- gsub('pca9+', '', defaultPredictors)
  187.  
  188. baBespokeFormula <- as.formula(paste('baph + 1 ~ ', bespokePredictors))
  189.  
  190. #previous default pooled form - currently used for mean estimates
  191. baphBespoke <- glm(baBespokeFormula, data = fullTrainingData,
  192. family = gaussian(link = 'log'))
  193.  
  194. #currently being used to get precision estimates
  195. baphBespokePrecision <- stan_glm(baBespokeFormula, data = fullTrainingData,
  196. family = gaussian(link = 'identity'), adapt_delta = 0.99)
  197.  
  198. if(exists('baphBespoke')){
  199. baphPooled <- baphBespoke
  200. baphPrecision <- baphBespokePrecision
  201. } else{
  202. baphPooled <- baphPooledDefault
  203. baphPrecision <- baphPrecisionDefault
  204. }
  205.  
  206. transform_the_baph_prediction <- function(predicted) {
  207. #add any necessary backtransformations to the baph value from prediction function
  208. # for example: default model glm(family = gaussian(link = 'log'))
  209. backtransformed <- exp(predicted) - 1
  210. #backtransformed <- predicted - 1 #Change this line as necessary!
  211. return(backtransformed)
  212. }
  213.  
  214. baphCoeffTable <- bind_cols(data.frame(baphPooled$coefficients),
  215. data.frame(baphPrecision$coefficients)) %>%
  216. mutate(coefficient = row.names(data.frame(baphPooled$coefficients)))
  217.  
  218. #Look at posterior predicted means and SD
  219. baphSims <- posterior_predict(baphPrecision) - 1
  220. baphMeans <- apply(baphSims, 1, mean)
  221.  
  222. #view baphMeans
  223. hist(baphMeans)
  224. #view relative precision of baph and tph
  225. sd(baphMeans) / mean(baphMeans)
  226.  
  227.  
  228. baphSD <- apply(baphSims, 2, function(x){sd(x)})
  229.  
  230. baphPredFrame <- data.frame(plot_id = fullTrainingData$plot_id,
  231. megaStrata = fullTrainingData$meta_megaStrata,
  232. observed = fullTrainingData$baph)
  233. baphPredFrame$stanPred <- colMeans(baphSims) - 1
  234. baphPredFrame$glmPred <- exp(predict(baphPooled)) - 1
  235.  
  236. baphPlotFrame <- baphPredFrame %>%
  237. gather(source, predicted, -plot_id, -observed, -megaStrata) %>%
  238. mutate(resid = predicted - observed)
  239.  
  240. baphRmseStan <- baphPlotFrame %>%
  241. filter(source == 'stanPred') %>%
  242. summarize(rmse = sqrt(mean(resid ^ 2))) %>%
  243. .$rmse
  244.  
  245. baphRmseGlm <- baphPlotFrame %>%
  246. filter(source == 'glmPred') %>%
  247. summarize(rmse = sqrt(mean(resid ^ 2))) %>%
  248. .$rmse
  249.  
  250. #####
  251. #CHECK MODEL CONVERGENCE
  252. #####
  253. #print out table of parameter means, summary stats, and convergence diagnostics
  254. pander(data.frame(summary(baphPrecision)))
  255.  
  256. #Look at plot of Rhats and effective sample size
  257. #Ideally, all Rhats are < 1.1 and all Neff are > 0.25
  258. #These are logical checks to ensure model converged
  259. plot(baphPrecision, 'rhat')
  260. #ratio of effective sample size over total iterations drawn
  261. plot(baphPrecision, 'neff_hist')
  262.  
  263. #Predicted vs. observed
  264. ggplot(data = baphPlotFrame, aes(x = observed, y = predicted, col = megaStrata)) +
  265. geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 1) +
  266. theme_bw() + facet_wrap(~source)
  267. #Observed vs. residuals
  268. ggplot(data = baphPlotFrame, aes(x = observed, y = resid, col = megaStrata)) +
  269. geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 0) +
  270. theme_bw() + facet_wrap(~source)
  271.  
  272. #Precision diagnostics, posterior predictive checks, and other diagnostics
  273. #Compare distribution of baph and tph (y is observed) to multiple simulated datasets from model (yrep)
  274. pp_check(baphPrecision) + ggtitle('baph') +
  275. ggtitle('We are not currently using this model.')
  276. #^^^ we aren't currently using this model
  277.  
  278. #Posterior predictive tests of median, upper, and lower quantiles
  279. ppc_stat(trainingData$baph, baphSims, stat = "mean") + ggtitle('Posterior predictive test of sample mean')
  280. ppc_stat(trainingData$baph, baphSims, stat = function(x){quantile(x, 0.95)}) + ggtitle('Posterior predictive test of upper (95%) quantile')
  281. ppc_stat(trainingData$baph, baphSims, stat = function(x){quantile(x, 0.05)}) + ggtitle('Posterior predictive test of lower (5%) quantile')
  282.  
  283. #Look at posterior predictive intervals of each plot from baphPrecision
  284. baphStanFrame <- baphPlotFrame %>% filter(source == 'stanPred')
  285. ppc_intervals(colMeans(baphSims) - 1, baphSims - 1, x = baphStanFrame$observed)
  286.  
  287. # TODO figure out why gaussian(link = 'log') gives bogus results
  288.  
  289. # TODO look into different default priors (rather than out of box)
  290. # e.g. lasso priors
  291.  
  292. # TODO look into QR transforming the predictors
  293.  
  294. # TODO incorporate plot inclusion probability into observation weights
  295.  
  296. ```
  297.  
  298. __baph default pooled GLM model RMSE is
  299. `r round(baphRmseGlm, digits = 2)` m2 per hectare from the model
  300.  
  301. ----------------------
  302.  
  303. Stem density models
  304. ----------------------
  305. ### TPH POOLED MODEL
  306. ```{r tphPooled}
  307.  
  308. #Deprecated Stan mean model until we figure out a permanent solution
  309. # tphPooledDefault <- stan_glm(stemsPooledFormula, data = fullTrainingData,
  310. # family = poisson(link = 'log'))
  311.  
  312. #previous default pooled form - currently used for mean estimates
  313. tphPooledDefault <- glm(stemsPooledFormula, data = fullTrainingData,
  314. family = gaussian(link = 'log'),
  315. control = list(maxit = 1e3))
  316.  
  317. #currently being used to get precision estimates
  318. tphPrecisionDefault <- stan_glm(stemsPooledFormula, data = fullTrainingData,
  319. family = gaussian(link = 'identity'))
  320.  
  321.  
  322.  
  323. #Models using plot inclusion probabilities
  324. # #currently being used to get precision estimates
  325. # tphPooledIdent <- stan_glm(stemsPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
  326. # family = gaussian(link = 'identity'))
  327. #
  328. # #previous default pooled form - currently used for mean estimates
  329. # tphGlm <- glm(stemsPooledFormula, data = fullTrainingData, weights = fullTrainingData$weight,
  330. # family = gaussian(link = 'log'),
  331. # control = list(maxit = 1e3))
  332. ```
  333.  
  334. ```{r tphBespoke}
  335.  
  336. # cat('### Bespoke model')
  337. # ---------------------------------------------------------------------------
  338. ### BEGIN BESPOKE FITTING - fit by hand below
  339. # ---------------------------------------------------------------------------
  340. #
  341.  
  342. stemsBespokeFormula <- as.formula(paste('tph + 1 ~ ', bespokePredictors))
  343.  
  344. #previous default pooled form - currently used for mean estimates
  345. tphBespoke <- glm(stemsBespokeFormula, data = fullTrainingData,
  346. family = gaussian(link = 'log'))
  347.  
  348. #currently being used to get precision estimates
  349. tphBespokePrecision <- stan_glm(stemsBespokeFormula, data = fullTrainingData,
  350. family = gaussian(link = 'identity'), adapt_delta = 0.99)
  351.  
  352. if(exists('tphBespoke')){
  353. tphPooled <- tphBespoke
  354. tphPrecision <- tphBespokePrecision
  355. } else {
  356. tphPooled <- tphPooledDefault
  357. tphPrecision <- tphPrecisionDefault
  358. }
  359.  
  360. transform_the_tph_prediction <- function(predicted) {
  361. #add any necessary backtransformations to the tph value from prediction function
  362. # for example: default model glm(family = gaussian(link = 'log'))
  363. backtransformed <- exp(predicted) - 1
  364. #backtransformed <- predicted - 1 #Change this line as necessary!
  365. return(backtransformed)
  366. }
  367.  
  368.  
  369. tphCoeffTable <- bind_cols(data.frame(tphPooled$coefficients),
  370. data.frame(tphPrecision$coefficients)) %>%
  371. mutate(coefficient = row.names(data.frame(tphPooled$coefficients)))
  372.  
  373. #Look at posterior predicted means and SD
  374. tphSims <- posterior_predict(tphPrecision) - 1
  375. tphMeans <- apply(tphSims, 1, mean)
  376.  
  377. #view baphMeans
  378. hist(tphMeans)
  379. #view relative precision of baph and tph
  380. sd(tphMeans) / mean(tphMeans)
  381.  
  382.  
  383. tphSD <- apply(tphSims, 2, function(x){sd(x)})
  384.  
  385. tphPredFrame <- data.frame(plot_id = fullTrainingData$plot_id,
  386. megaStrata = fullTrainingData$meta_megaStrata,
  387. observed = fullTrainingData$tph)
  388. tphPredFrame$stanPred <- colMeans(tphSims) - 1
  389. tphPredFrame$glmPred <- exp(predict(tphPooled)) - 1
  390.  
  391. tphPlotFrame <- tphPredFrame %>%
  392. gather(source, predicted, -plot_id, -observed, -megaStrata) %>%
  393. mutate(resid = predicted - observed)
  394.  
  395. tphRmseStan <- tphPlotFrame %>%
  396. filter(source == 'stanPred') %>%
  397. summarize(rmse = sqrt(mean(resid ^ 2))) %>%
  398. .$rmse
  399.  
  400. tphRmseGlm <- tphPlotFrame %>%
  401. filter(source == 'glmPred') %>%
  402. summarize(rmse = sqrt(mean(resid ^ 2))) %>%
  403. .$rmse
  404.  
  405. #####
  406. #CHECK MODEL CONVERGENCE
  407. #####
  408. #print out table of parameter means, summary stats, and convergence diagnostics
  409. pander(data.frame(summary(tphPrecision)))
  410.  
  411. #Look at plot of Rhats and effective sample size
  412. #Ideally, all Rhats are < 1.1 and all Neff are > 0.25
  413. #These are logical checks to ensure model converged
  414. plot(tphPrecision, 'rhat')
  415. #ratio of effective sample size over total iterations drawn
  416. plot(tphPrecision, 'neff_hist')
  417.  
  418. #Predicted vs. observed
  419. ggplot(data = tphPlotFrame, aes(x = observed, y = predicted, col = megaStrata)) +
  420. geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 1) +
  421. theme_bw() + facet_wrap(~source)
  422. #Observed vs. residuals
  423. ggplot(data = tphPlotFrame, aes(x = observed, y = resid, color = megaStrata)) +
  424. geom_point(alpha = 0.5) + geom_abline(intercept = 0, slope = 0) +
  425. theme_bw() + facet_wrap(~source)
  426.  
  427. #Precision diagnostics, posterior predictive checks, and other diagnostics
  428. #Compare distribution of baph and tph (y is observed) to multiple simulated datasets from model (yrep)
  429. pp_check(tphPrecision) + ggtitle('tph - we are not currently using this model')
  430. #^^^ we aren't currently using this model
  431.  
  432. #Posterior predictive tests of median, upper, and lower quantiles
  433. ppc_stat(trainingData$tph, tphSims, stat = "mean") + ggtitle('Posterior predictive test of sample mean')
  434. ppc_stat(trainingData$tph, tphSims, stat = function(x){quantile(x, 0.95)}) + ggtitle('Posterior predictive test of upper (95%) quantile')
  435. ppc_stat(trainingData$tph, tphSims, stat = function(x){quantile(x, 0.05)}) + ggtitle('Posterior predictive test of lower (5%) quantile')
  436.  
  437. #Look at posterior predictive intervals of each plot from baphPrecision
  438. tphStanFrame <- tphPlotFrame %>% filter(source == 'stanPred')
  439. ppc_intervals(colMeans(tphSims) - 1, tphSims - 1, x = tphStanFrame$observed)
  440.  
  441. # TODO figure out why gaussian(link = 'log') gives bogus results
  442.  
  443. # TODO look into different default priors (rather than out of box)
  444. # e.g. lasso priors
  445.  
  446. # TODO look into QR transforming the predictors
  447.  
  448. # TODO incorporate plot inclusion probability into observation weights
  449.  
  450.  
  451. ```
  452.  
  453.  
  454. __tph default pooled GLM model RMSE is
  455. `r round(tphRmseGlm, digits = 2)` stems per hectare from the model.__
  456.  
  457.  
  458. ```{r defineCaps}
  459.  
  460. pooled_baphCap <- quantile(trainingData$baph, 0.99) + sd(trainingData$baph)
  461. pooled_tphCap <- quantile(trainingData$tph, 0.99) + sd(trainingData$tph)
  462.  
  463. ```
  464. The baph cap is set at `r round(pooled_baphCap, 1) ` for pooled models.
  465. The tph cap is set at `r round(pooled_tphCap, 1)` for pooled models.
  466.  
  467.  
  468. ```{r ModelAssistedRatios}
  469.  
  470. trainingData$baphPred <- baphPredFrame$glmPred
  471. trainingData$tphPred <- tphPredFrame$glmPred
  472.  
  473.  
  474. ##Can manually change nPlots filter here if needed
  475.  
  476. # Model-assisted stand-level ratios
  477. standRatVals <- trainingData %>%
  478. filter(plot_id %nin% zeroPlots) %>%
  479. group_by(meta_stand_id) %>%
  480. summarize(baphObsMean = mean(baph),
  481. tphObsMean = mean(tph),
  482. baphPredMean = mean(baphPred),
  483. tphPredMean = mean(tphPred),
  484. nPlots = n()) %>%
  485. mutate(tphRat = tphObsMean / tphPredMean,
  486. baphRat = baphObsMean / baphPredMean) %>%
  487. filter(nPlots >= 5)
  488.  
  489. # Model-assisted strata-level ratios
  490.  
  491. # define grouping variable in metadata (without meta_ prefix)
  492. # if no grouping structure, set to NULL
  493. strataVar <- 'megaStrata'
  494.  
  495. # if no grouping variable, just get population-level ratios
  496. if(is.null(strataVar)) {
  497. strataRatios <- trainingData %>%
  498. filter(plot_id %nin% zeroPlots) %>%
  499. mutate(strataVar = 'population') %>%
  500. group_by(strataVar) %>%
  501. summarize(baphObsMean = mean(baph),
  502. tphObsMean = mean(tph),
  503. baphPredMean = mean(baphPred),
  504. tphPredMean = mean(tphPred),
  505. nPlots = n()) %>%
  506. mutate(tphRat = tphObsMean / tphPredMean,
  507. baphRat = baphObsMean / baphPredMean)
  508. } else {
  509. trainingData$strataVar <- trainingData[, paste0('meta_', strataVar)]
  510.  
  511. strataTab <- trainingData %>%
  512. select(meta_stand_id, strataVar) %>%
  513. distinct()
  514.  
  515. strataRatios <- trainingData %>%
  516. filter(plot_id %nin% zeroPlots) %>%
  517. group_by(strataVar) %>%
  518. summarize(baphObsMean = mean(baph),
  519. tphObsMean = mean(tph),
  520. baphPredMean = mean(baphPred),
  521. tphPredMean = mean(tphPred),
  522. nPlots = n()) %>%
  523. mutate(tphRat = tphObsMean / tphPredMean,
  524. baphRat = baphObsMean / baphPredMean)
  525. }
  526.  
  527. # model-assisted population level ratios
  528. populationRatios <- trainingData %>%
  529. filter(plot_id %nin% zeroPlots) %>%
  530. summarize(baphObsMean = mean(baph),
  531. tphObsMean = mean(tph),
  532. baphPredMean = mean(baphPred),
  533. tphPredMean = mean(tphPred),
  534. nPlots = n()) %>%
  535. mutate(tphRat = tphObsMean / tphPredMean,
  536. baphRat = baphObsMean / baphPredMean)
  537.  
  538. varianceTable <- trainingData %>%
  539. filter(plot_id %nin% zeroPlots) %>%
  540. filter(meta_stand_id %in% standRatVals$meta_stand_id) %>%
  541. mutate(BAPH_resid = baphPred - baph,
  542. TPH_resid = tphPred - tph) %>%
  543. group_by(meta_stand_id) %>%
  544. mutate(meanBAPH_resid= mean(BAPH_resid),
  545. meanTPH_resid = mean(TPH_resid)) %>%
  546. ungroup() %>%
  547. mutate(BAPH_errsquared = (BAPH_resid - meanBAPH_resid) ^ 2,
  548. TPH_errsquared = (TPH_resid - meanTPH_resid) ^ 2) %>%
  549. group_by(meta_stand_id) %>%
  550. left_join(., standRatVals) %>%
  551. summarize(n = length(tph),
  552. varBAPH_MA = sum(BAPH_errsquared) / (length(baph) - 1),
  553. varTPH_MA = sum(TPH_errsquared) / (length(tph) - 1),
  554. varBAPH_DB = sum((baph[plot_id %nin% zeroPlots] -
  555. unique(baphObsMean)) ^ 2) /
  556. (length(baph[plot_id %nin% zeroPlots]) - 1),
  557. varTPH_DB = sum((tph[plot_id %nin% zeroPlots] -
  558. unique(tphObsMean)) ^ 2) /
  559. (length(tph[plot_id %nin% zeroPlots]) - 1)) %>%
  560. select(meta_stand_id, n,
  561. varBAPH_DB, varTPH_DB,
  562. varBAPH_MA, varTPH_MA, n) %>%
  563. ungroup()
  564.  
  565. standRatios <- left_join(standRatVals, varianceTable) %>%
  566. filter(meta_stand_id != 'OC0201-2725')
  567.  
  568. # get MA population CIs
  569. metJoin <- trainingData %>%
  570. select(meta_stand_id, meta_stand_acre) %>%
  571. distinct()
  572.  
  573. maCIs <- varianceTable %>%
  574. mutate(baphMaSe = sqrt(varBAPH_MA / n),
  575. tphMaSe = sqrt(varTPH_MA / n)) %>%
  576. mutate(bapaMaSe = convert_baph_to_bapa(baphMaSe),
  577. tpaMaSe = tphMaSe / 2.47) %>%
  578. select(meta_stand_id, bapaMaSe, tpaMaSe, n) %>%
  579. left_join(metJoin) %>%
  580. summarize(bapaMaSePop = sqrt(
  581. sum((meta_stand_acre / sum(meta_stand_acre)) ^ 2 * bapaMaSe ^ 2)
  582. ),
  583. tpaMaSePop = sqrt(
  584. sum((meta_stand_acre / sum(meta_stand_acre)) ^ 2 * tpaMaSe ^ 2)
  585. ),
  586. nStands = n(), nPlots = sum(n)) %>%
  587. mutate(bapaCI = qt(1 - 0.1 / 2, df = nPlots - nStands) * bapaMaSePop,
  588. tpaCI = qt(1 - 0.1 / 2, df = nPlots - nStands) * tpaMaSePop)
  589.  
  590. pander(maCIs)
  591.  
  592. ```
  593.  
  594. --------------------------------
  595.  
  596. Diameter distribution models
  597. --------------------------------
  598.  
  599. ```{r distroFit}
  600.  
  601. # Necessary to register parallel clusters by hand
  602. cl <- makeForkCluster(detectCores())
  603. registerDoParallel(cl)
  604.  
  605. genericModelBuild <- function(N, ys, predVars, ...) {
  606.  
  607. if (is.null(ncol(ys))) { # in case only one spp.
  608. Gpsi <- ys
  609. } else {
  610. Gpsi <- ys[, N]
  611. }
  612.  
  613. dModelObject <- cbind(data.frame(y = Gpsi), predVars)
  614.  
  615. # TODO fit RF model, keep all trees
  616. GModelRF <- randomForest(y ~ .,
  617. data = dModelObject,
  618. ntree = 50, keep.forest = T,
  619. type = 'regression')
  620.  
  621. return(GModelRF)
  622. }
  623.  
  624. dist_to_drop <- which(is.na(fullTrainingData[, distroVarsIDX[1]]))
  625.  
  626. if(length(dist_to_drop) > 0) {
  627. fullTrainingDataNoNA <- fullTrainingData[-dist_to_drop, ] %>%
  628. filter(tph > 0)
  629. } else {
  630. fullTrainingDataNoNA <- fullTrainingData %>%
  631. filter(tph > 0)
  632. }
  633.  
  634. distroDefault <- lapply(1:length(distroVarsIDX), genericModelBuild,
  635. ys = fullTrainingDataNoNA[, distroVarsIDX],
  636. predVars = fullTrainingDataNoNA[, pcScoresIDX])
  637.  
  638. distroBespokePredictors <- NULL
  639.  
  640. # ------------------------------------------------------------------
  641. ### Bespoke diameter distribution example
  642. # ------------------------------------------------------------------
  643. distroBespokePredictors <- c(names(fullTrainingDataNoNA)[pcScoresIDX], 'meta_megaStrata')
  644. distroBespokePredictors <- distroBespokePredictors[distroBespokePredictors != 'pca9']
  645.  
  646. distroBespoke <- lapply(1:length(distroVarsIDX), genericModelBuild,
  647. ys = fullTrainingDataNoNA[, distroVarsIDX],
  648. predVars = fullTrainingDataNoNA[, distroBespokePredictors])
  649. #
  650. # -----------------------------------------------------------------
  651.  
  652. if(exists('distroBespoke')) {
  653. distroSet <- distroBespoke
  654. } else {
  655. distroSet <- distroDefault
  656. }
  657.  
  658. names(distroSet) <- names(fullTrainingDataNoNA)[distroVarsIDX]
  659.  
  660. ```
  661.  
  662. ```{r distroDiagnostics}
  663.  
  664. distroDF <- data.frame()
  665. for (i in 1:length(distroSet)) {
  666. tmpPred <- predict(distroSet[[i]], newdata = fullTrainingDataNoNA, predict.all = TRUE)
  667. tempDF <- data.frame(predicted = tmpPred$aggregate)
  668. tempDF$observed <- fullTrainingDataNoNA[, distroVarsIDX[i]]
  669. tempDF$meta_stand_id <- fullTrainingDataNoNA$meta_stand_id
  670. tempDF$plot_id <- fullTrainingDataNoNA$plot_id
  671. tempDF$meta_megaStrata <- fullTrainingDataNoNA$meta_megaStrata
  672. dbhClass <- names(fullTrainingDataNoNA)[distroVarsIDX][i]
  673. tempDF$dbhClass <- as.numeric(strsplit(dbhClass, split = '_')[[1]][2])
  674. tempDF$SD <- apply(tmpPred$individual, 1, function(x) {sd(x)})
  675. distroDF <- rbind(distroDF, tempDF)
  676. }
  677.  
  678. # normalize predicted distro for each plot (so all distro scores sum to one for each plot)
  679. normalized <- distroDF %>%
  680. select(plot_id, dbhClass, predicted) %>%
  681. group_by(plot_id) %>%
  682. mutate(predicted = ifelse(predicted < 0, 0, predicted)) %>%
  683. mutate(predNorm = predicted / sum(predicted)) %>%
  684. select(plot_id, dbhClass, predNorm)
  685.  
  686. distroDF <- distroDF %>%
  687. left_join(normalized)
  688.  
  689. distFit <- ggplot(distroDF, aes(x = observed, y = predicted, color = meta_megaStrata)) +
  690. facet_wrap(~ dbhClass) + geom_point() + theme_bw() +
  691. geom_abline(intercept = 0, slope = 1)
  692.  
  693. print(distFit)
  694.  
  695. DiaDataTidy <- distroDF %>%
  696. gather(key = variable, value = value, -dbhClass, -plot_id, -meta_stand_id,
  697. -meta_megaStrata, -SD, -predNorm)
  698.  
  699. distPlot <- ggplot(DiaDataTidy,
  700. aes(x = factor(dbhClass),
  701. y = value,
  702. color = variable)) +
  703. geom_boxplot(position = position_dodge(0.5), width = 1) +
  704. facet_wrap(~variable) + theme_bw() + xlab("Diameter class") +
  705. ylim(0, 1) + ylab("Trees per hectare") +
  706. scale_color_manual("", values = c("darkblue", "orange"))
  707.  
  708. print(distPlot)
  709.  
  710. #Box plot of average precision (standard deviation) per diameter class
  711. distroSDPlot <- ggplot(distroDF, aes(x = factor(dbhClass), y = SD)) + geom_boxplot() +
  712. theme_bw() + xlab('diameter class') + ylab('standard deviation')
  713. print(distroSDPlot)
  714.  
  715. ```
  716.  
  717. ```{r distroRatios, include = F}
  718.  
  719. if(is.null(strataVar)) {
  720. strataDistroRatios <- distroDF %>%
  721. filter(as.character(plot_id) %nin% zeroPlots) %>%
  722. mutate(strataVar = 'population') %>%
  723. group_by(strataVar, dbhClass) %>%
  724. summarize(meanPred = mean(predicted),
  725. meanObs = mean(observed)) %>%
  726. mutate(ratio = meanObs / meanPred) %>%
  727. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  728. select(DClass = dbhClass, meanPred, meanObs, rat = ratio)
  729. } else {
  730. strataDistroRatios <- distroDF %>%
  731. filter(as.character(plot_id) %nin% zeroPlots) %>%
  732. left_join(strataTab) %>%
  733. group_by(strataVar, dbhClass) %>%
  734. summarize(meanPred = mean(predicted),
  735. meanObs = mean(observed)) %>%
  736. mutate(ratio = meanObs / meanPred) %>%
  737. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  738. select(strataVar, DClass = dbhClass, meanPred, meanObs, rat = ratio)
  739. }
  740.  
  741. # model-assisted population level distro ratios
  742. populationDistroRatios <- distroDF %>%
  743. filter(as.character(plot_id) %nin% zeroPlots) %>%
  744. group_by(dbhClass) %>%
  745. summarize(meanPred = mean(predicted),
  746. meanObs = mean(observed)) %>%
  747. mutate(ratio = meanObs / meanPred) %>%
  748. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  749. select(DClass = dbhClass, meanPred, meanObs, rat = ratio)
  750.  
  751. # model-assisted stand-level distro ratios
  752. standDistroRatios <- distroDF %>%
  753. filter(as.character(plot_id) %nin% zeroPlots) %>%
  754. filter(meta_stand_id %in% unique(standRatios$meta_stand_id)) %>%
  755. group_by(meta_stand_id, dbhClass) %>%
  756. summarize(meanPred = mean(predicted),
  757. meanObs = mean(observed)) %>%
  758. mutate(ratio = meanObs / meanPred) %>%
  759. select(stand = meta_stand_id, DClass = dbhClass, meanPred,
  760. meanObs, rat = ratio)
  761.  
  762. # obsDiams <- cruiseData$diameter[!is.na(cruiseData$diameter)]
  763. # N <- length(obsDiams)
  764. # N_pred <- length(obsDiams)
  765. #
  766. # weiFit <- stan(file = '/tmp/weibull-dist.stan', data = stanData)
  767. #
  768. # diamSims <- extract(weiFit, 'y_pred')$y_pred
  769.  
  770. ```
  771.  
  772. --------------------------
  773.  
  774. Importance Value Models
  775. --------------------------
  776.  
  777. ```{r importanceFit}
  778. speciesCols <- trainingData %>%
  779. select(contains('spp'), -contains('meta'))
  780. speciesLabs <- names(speciesCols)
  781.  
  782.  
  783. impBespokePredictors <- c(names(trainingData)[pcScoresIDX], 'meta_megaStrata')
  784. impBespokePredictors <- impBespokePredictors[impBespokePredictors %nin% c('pca9')]
  785. fit_spp_models <- function(spp,
  786. td,
  787. threshold = NULL,
  788. pcScoresIDX,
  789. bespokePredictors = NULL, # predictors in addition to pcas
  790. stan = F) { # use stan models?
  791. predictors <- bespokePredictors
  792.  
  793. training <- td %>%
  794. filter(baph > 0) %>%
  795. select(y = spp, one_of(predictors)) %>%
  796. mutate(present = ifelse(y > 0, 1, 0))
  797.  
  798. occTab <- training %>%
  799. group_by(present) %>%
  800. summarize(n = n()) %>%
  801. mutate(prop = n / sum(n))
  802.  
  803. # sometimes a species is on every plot, or it is on no plots (groupstrap)
  804. # a presence/absence model is not necessary in these cases
  805. if(nrow(occTab) == 1) {
  806. outcome <- occTab$present
  807.  
  808. presenceMod <- lm(outcome ~ 1)
  809. training$predProp <- predict(presenceMod, newdata = training)
  810. threshold <- 1
  811. } else {
  812. presentProp <- occTab %>%
  813. filter(present == 1) %>%
  814. .$prop
  815.  
  816. presentFormula <- as.formula(paste('present ~ ',
  817. paste0(predictors, collapse = '+')))
  818.  
  819.  
  820. # fit the presence/absence model
  821.  
  822. # if the species not present or absent on more than 95% of plots, fit
  823. # statistical model for presence/absence
  824. if(all(occTab$prop <= 0.95)) {
  825. # fit model in stan if desired
  826. if(stan) {
  827. tPrior <- student_t(df = 7, location = 0, scale = 2.5)
  828. presenceMod <- stan_glm(presentFormula, data = training,
  829. family = binomial(link = 'logit'),
  830. prior = tPrior, prior_intercept = tPrior,
  831. cores = detectCores() - 4)
  832.  
  833. #Calculate postProp and presAbs
  834. presentPred <- posterior_predict(presenceMod)
  835. training$predProp <- unname(round(colMeans(presentPred), 3))
  836. } else { # otherwise we will just use glm
  837. presenceMod <- glm(presentFormula, data = training,
  838. family = 'binomial')
  839. training$predProp <- predict(presenceMod, type = 'response')
  840. }
  841. } else { # if species presence/absence is highly imbalanced use random forest
  842. presenceMod <- randomForest(presentFormula, data = training)
  843. training$predProp <- predict(presenceMod)
  844. }
  845.  
  846.  
  847. # threshold will be the mean present/absent score above which results in
  848. # the same proportion of plots getting a 'present' assignment as there were plots
  849. # with the species in the training data
  850.  
  851. # the data-driven threshold can be overriden in the function call
  852. if(is.null(threshold)) {
  853. threshold <- quantile(training$predProp, 1 - presentProp)
  854. }
  855. }
  856.  
  857. training <- training %>%
  858. mutate(presAbs = ifelse(predProp >= threshold, 1, 0))
  859.  
  860. # fit importance model to plots where species is present
  861. trainingPresent <- training %>%
  862. filter(present == 1)
  863.  
  864. impFormula <- as.formula(paste('y ~ ', paste0(predictors, collapse = '+')))
  865.  
  866. # use regression if more than 5 plots with the species
  867. if(nrow(trainingPresent) > 5) {
  868. impMod <- randomForest(impFormula, data = trainingPresent,
  869. type = 'regression')
  870. } else { # use classification if 5 or fewer plots with the species
  871. if(nrow(trainingPresent) > 1) {
  872. trainingPresent$y <- factor(as.character(trainingPresent$y))
  873. impMod <- randomForest(impFormula,
  874. data = trainingPresent,
  875. type = 'classification')
  876. } else {
  877. # if only one plot, fit model to predict that plot's value
  878. if(nrow(trainingPresent) == 1) {
  879. impMod <- lm(y ~ 1, data = trainingPresent)
  880. } else { # if no plots (groupstrap) predict 0
  881. impMod <- lm(0 ~ 1)
  882. }
  883. }
  884.  
  885. }
  886.  
  887. outMods <- list(spp = spp,
  888. threshold = threshold,
  889. presenceMod = presenceMod,
  890. impMod = impMod,
  891. predictors = predictors)
  892.  
  893. return(outMods)
  894. }
  895.  
  896. # fit species models with data driven thresholds
  897. sppMods <- lapply(speciesLabs, fit_spp_models,
  898. td = trainingData,
  899. pcScoresIDX = pcScoresIDX,
  900. bespokePredictors = impBespokePredictors,
  901. stan = F)
  902.  
  903. names(sppMods) <- speciesLabs
  904.  
  905. # Function to predict species importance from sppMods
  906. predict_spp_importance <- function(sppMod, pixels) {
  907. pixels$present <- 0
  908.  
  909. # Set up tmp threshold
  910. threshold <- sppMod[['threshold']]
  911.  
  912. # Set up tmp binomial and importance models
  913. presenceMod <- sppMod[['presenceMod']]
  914. impMod <- sppMod[['impMod']]
  915. smoothMod <- sppMod[['smoothMod']]
  916.  
  917. # Predict presence/absence
  918. if('stanreg' %in% class(presenceMod)) {
  919. postPropSims <- posterior_predict(presenceMod, newdata = pixels)
  920. pixels$propPred <- colMeans(postPropSims)
  921. } else {
  922. pixels$propPred <- predict(presenceMod, newdata = pixels, type = 'response')
  923. }
  924.  
  925.  
  926. pixelPreds <- pixels %>%
  927. mutate(presAbsPred = ifelse(propPred > threshold, 1, 0)) %>%
  928. mutate(rawImp = as.numeric(as.character(predict(impMod, newdata = .)))) %>%
  929. mutate(sppImp = ifelse(presAbsPred == 1, rawImp, 0)) %>%
  930. select(presAbsPred, sppImp)
  931.  
  932. out <- pixelPreds %>%
  933. select(sppImp)
  934.  
  935. names(out) <- sppMod[[1]]
  936.  
  937. return(out)
  938.  
  939. }
  940.  
  941. # training data PCAs for testing
  942. pixels <- trainingData
  943.  
  944. sppPreds <- bind_cols(mclapply(sppMods,
  945. pixels = pixels,
  946. predict_spp_importance))
  947.  
  948. ```
  949.  
  950. ```{r importanceDiagnostics}
  951.  
  952. impDF <- data.frame()
  953.  
  954. for (i in 1:length(sppMods)) {
  955. sppName <- sppMods[[i]][[1]]
  956. tmpPred <- predict_spp_importance(sppMods[[i]], pixels = fullTrainingData)
  957. tempDF <- data.frame(predicted = unlist(tmpPred))
  958. tempDF$meta_stand_id <- fullTrainingData$meta_stand_id
  959. tempDF$plot_id <- fullTrainingData$plot_id
  960. tempDF$meta_megaStrata <- fullTrainingData$meta_megaStrata
  961. tempDF$observed <- fullTrainingData[, sppName]
  962. stID <- unlist(strsplit(sppName, split = '_'))[2]
  963. tempDF$species <- st_sp2$common[match(stID, st_sp2$stID)]
  964. tempDF$sppID <- sppName
  965. #tempDF$SD <- apply(tmpPred$individual, 1, function(x) {sd(x)})
  966. impDF <- rbind(impDF, tempDF)
  967. }
  968.  
  969.  
  970. impPlot <- ggplot(impDF, aes(x = observed, y = predicted, color = meta_megaStrata)) +
  971. geom_point(alpha = 0.5) +
  972. facet_wrap( ~ species) +
  973. theme_bw() +
  974. xlim(0, 1) + ylim(0, 1) +
  975. xlab("Observed species importance") +
  976. ylab("Predicted species importance") +
  977. geom_abline(slope = 1, intercept = 0, color = 'red')
  978.  
  979. print(impPlot)
  980.  
  981.  
  982. # #Species importance score precision
  983. # impSDPlot <- ggplot(impDF, aes(x = species, y = SD)) + geom_boxplot() +
  984. # theme_bw() + xlab('diameter class') + ylab('standard deviation')
  985. # print(impSDPlot)
  986.  
  987.  
  988. ```
  989.  
  990. ```{r impRatios}
  991.  
  992. if(is.null(strataVar)) {
  993. strataImpRatios <- impDF %>%
  994. filter(as.character(plot_id) %nin% zeroPlots) %>%
  995. mutate(strataVar = 'population') %>%
  996. group_by(strataVar, sppID) %>%
  997. summarize(meanPred = mean(predicted),
  998. meanObs = mean(observed)) %>%
  999. mutate(ratio = meanObs / meanPred) %>%
  1000. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  1001. select(strataVar, sppID, meanPred, meanObs, rat = ratio)
  1002. } else {
  1003. strataImpRatios <- impDF %>%
  1004. filter(as.character(plot_id) %nin% zeroPlots) %>%
  1005. left_join(strataTab) %>%
  1006. group_by(strataVar, sppID) %>%
  1007. summarize(meanPred = mean(predicted),
  1008. meanObs = mean(observed)) %>%
  1009. mutate(ratio = meanObs / meanPred) %>%
  1010. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  1011. select(strataVar, sppID, meanPred, meanObs, rat = ratio)
  1012. }
  1013.  
  1014. # model-assisted population level species importance ratios
  1015. populationImpRatios <- impDF %>%
  1016. filter(as.character(plot_id) %nin% zeroPlots) %>%
  1017. group_by(sppID) %>%
  1018. summarize(meanPred = mean(predicted),
  1019. meanObs = mean(observed)) %>%
  1020. mutate(ratio = meanObs / meanPred) %>%
  1021. mutate(ratio = ifelse(is.na(ratio), 1, ratio)) %>%
  1022. select(sppID, meanPred, meanObs, rat = ratio)
  1023.  
  1024. # model-assisted stand-level species importance ratios
  1025. standImpRatios <- impDF %>%
  1026. filter(as.character(plot_id) %nin% zeroPlots) %>%
  1027. filter(meta_stand_id %in% unique(standRatios$meta_stand_id)) %>%
  1028. group_by(meta_stand_id, sppID) %>%
  1029. summarize(meanPred = mean(predicted),
  1030. meanObs = mean(observed)) %>%
  1031. mutate(ratio = meanObs / meanPred) %>%
  1032. select(stand = meta_stand_id, sppID, meanPred,
  1033. meanObs, rat = ratio)
  1034.  
  1035. ```
  1036.  
  1037. ```{r computeEmpiricalMatrix}
  1038. # read in trees, set up predictors
  1039. wrapTrees <- cruiseDataNoZero %>%
  1040. mutate(diameterClass = round(diameter)) %>%
  1041. left_join(trainingData %>% mutate(plot_id = as.character(plot_id)))
  1042.  
  1043. sppNames <- names(trainingData)[grep('spp', names(trainingData))]
  1044. sppNames <- sppNames[!grepl('meta', sppNames)]
  1045.  
  1046. diamNames <- names(trainingData)[grep('diam_', names(trainingData))]
  1047. diamNames <- diamNames[!grepl('meta', diamNames)]
  1048.  
  1049. wrapPreds <- c('baph', 'tph', sppNames, diamNames)
  1050. predsSide <- paste0(wrapPreds, collapse = '+')
  1051. wrapFormula <- as.formula(paste0('common ~ ', predsSide))
  1052.  
  1053. # function that will build model to predict the species of a tree of a given
  1054. # diameter as a function of baph, tph, sppImp scores, and diam scores
  1055. wrap_by_diam <- function(diam) {
  1056. print(diam)
  1057. # subset of trees where diameter = diam
  1058. diamTraining <- wrapTrees %>%
  1059. filter(diameterClass == diam) %>%
  1060. mutate(roundedExpansion = round(expansion))
  1061.  
  1062. diamTraining <- diamTraining[rep(row.names(diamTraining), diamTraining$roundedExpansion), ]
  1063. distinctRows <- diamTraining %>%
  1064. distinct()
  1065.  
  1066. # build a model to predict species as a function of stocking and species.
  1067. if(length(unique(diamTraining$common)) > 2 & nrow(distinctRows) > 5) {
  1068. # fit multinomial model to predict probability of a given tree belonging
  1069. # to each species (use predict(mod, newdata, 'probs') to get probs)
  1070. mod <- nnet::multinom(wrapFormula,
  1071. data = diamTraining, MaxNWts = 10000,
  1072. maxit = 1000)
  1073.  
  1074. if('try-error' %in% class(mod)) {
  1075. stop(paste0('wrapMod failed to build for diameter class ', diam))
  1076. }
  1077.  
  1078. return(mod)
  1079. } else { # if we only observe trees in the diameter class on less than 5 plots,
  1080. # we assign empirical probability from training treelist
  1081. diamSpp <- unique(diamTraining$common)
  1082. sppTab <- table(diamTraining$common) / nrow(diamTraining)
  1083. sppFrame <- data.frame(t(matrix(sppTab)))
  1084. names(sppFrame) <- dimnames(sppTab)[[1]]
  1085. return(sppFrame)
  1086. }
  1087.  
  1088. }
  1089.  
  1090. # list of unique diameters
  1091. uniqueDiams <- wrapTrees %>%
  1092. arrange(diameterClass) %>%
  1093. select(diameterClass) %>%
  1094. distinct() %>%
  1095. .$diameterClass
  1096.  
  1097. # list of unique species names
  1098. uniqueCommons <- wrapTrees %>%
  1099. select(common) %>%
  1100. distinct() %>%
  1101. arrange(common) %>%
  1102. .$common
  1103.  
  1104. # build models for all diameter classes
  1105. wrapMods <- mclapply(uniqueDiams, wrap_by_diam, mc.cores = detectCores() - 2)
  1106. names(wrapMods) <- paste('diam', uniqueDiams, sep = '_')
  1107.  
  1108. # check to see if wrapMods will work
  1109. classCheck <- c()
  1110. for(i in 1:length(wrapMods)) {
  1111. classCheck <- c(classCheck, class(wrapMods[[i]]))
  1112. }
  1113.  
  1114. if(any(classCheck == 'try-error')) {
  1115. rm(list = 'wrapMods')
  1116. stop('wrapMods failed to write out properly')
  1117. }
  1118.  
  1119. ```
  1120.  
  1121. ```{r sppDiameterLimits}
  1122. # global diameter limits by species
  1123. populationCruiseLimits <- cruiseDataNoZero %>%
  1124. mutate(globalMinD = round(min(diameter)),
  1125. globalMaxD = round(max(diameter))) %>%
  1126. group_by(common) %>%
  1127. summarize(minD = round(min(diameter) - 2),
  1128. maxD = round(max(diameter) + 2),
  1129. globalMinD = unique(globalMinD),
  1130. globalMaxD = unique(globalMaxD)) %>%
  1131. mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
  1132. maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
  1133. select(common, minAllowD, maxAllowD)
  1134.  
  1135.  
  1136. # strata-level diameter limits for each species
  1137. if(is.null(strataVar)) {
  1138. strataCruiseLimits <- cruiseDataNoZero %>%
  1139. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  1140. mutate(globalMinD = round(min(diameter)),
  1141. globalMaxD = round(max(diameter))) %>%
  1142. mutate(strataVar = 'population') %>%
  1143. group_by(strataVar, common) %>%
  1144. summarize(minD = round(min(diameter) - 2),
  1145. maxD = round(max(diameter) + 2),
  1146. globalMinD = unique(globalMinD),
  1147. globalMaxD = unique(globalMaxD)) %>%
  1148. mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
  1149. maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
  1150. select(strataVar, common, minAllowD, maxAllowD)
  1151. } else {
  1152. strataCruiseLimits <- cruiseDataNoZero %>%
  1153. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  1154. mutate(globalMinD = round(min(diameter)),
  1155. globalMaxD = round(max(diameter))) %>%
  1156. group_by(strataVar, common) %>%
  1157. summarize(minD = round(min(diameter) - 2),
  1158. maxD = round(max(diameter) + 2),
  1159. globalMinD = unique(globalMinD),
  1160. globalMaxD = unique(globalMaxD)) %>%
  1161. mutate(minAllowD = ifelse(minD < globalMinD, globalMinD, minD),
  1162. maxAllowD = ifelse(maxD > globalMaxD, globalMaxD, maxD)) %>%
  1163. select(strataVar, common, minAllowD, maxAllowD)
  1164. }
  1165.  
  1166. ```
  1167.  
  1168. ---------------------
  1169.  
  1170. Core Groupstrap Results
  1171. ---------------------
  1172.  
  1173. ```{r groupStrap, results = 'hide'}
  1174.  
  1175. ##TODO: add a function here to iterate over each set of
  1176. #values in metadata and try predicting bapa and tpa
  1177.  
  1178.  
  1179. bootPolygons <- trainingData %>%
  1180. dplyr::group_by(meta_stand_id) %>%
  1181. dplyr::summarize(nPlots = length(baph),
  1182. observedBAPH = mean(baph),
  1183. seObservedBAPH = sd(baph) / sqrt(length(meta_stand_id)),
  1184. predBAPH = mean(baphPred),
  1185. observedTPH = mean(tph),
  1186. seObservedTPH = sd(tph) / sqrt(length(meta_stand_id)),
  1187. predTPH = mean(tphPred),
  1188. # 90% CI
  1189. tScore = qt(1 - (0.1 / 2), nPlots - 1)) %>%
  1190. dplyr::ungroup() %>%
  1191. dplyr::filter(nPlots >= groupSize) # change as needed
  1192.  
  1193. predict_for_one_group <- function(polygon) {
  1194. polygonIdx <- which(bootPolygons$meta_stand_id == polygon)
  1195. catMsg(paste('Polygon', polygonIdx, ":", polygon))
  1196. #subset training data for reduced model fitting
  1197. tdSub <- trainingData %>%
  1198. filter(meta_stand_id != polygon)
  1199.  
  1200. #subset training data for prediction to target polygon
  1201. tdApply <- trainingData %>%
  1202. filter(meta_stand_id == polygon)
  1203.  
  1204. baphGS <- baphPooledDefault
  1205. tphGS <- tphPooledDefault
  1206.  
  1207. baphModelSub <- update(baphGS, data = tdSub)
  1208. baPredSub <- exp(predict(baphModelSub,
  1209. newdata = tdApply)) - 1
  1210.  
  1211. tphModelSub <- update(tphGS, data = tdSub)
  1212. tphPredSub <- exp(predict(tphModelSub,
  1213. newdata = tdApply)) - 1
  1214. #
  1215. # # make distro and imp predictions using full models because
  1216. # # RF models take too long to refit repeatedly
  1217.  
  1218. distroBespokePredictors <- NULL
  1219.  
  1220. if(is.null(distroBespokePredictors)) {
  1221. distroGroupSub <- lapply(1:length(distroVarsIDX), genericModelBuild,
  1222. ys = tdSub[, distroVarsIDX],
  1223. predVars = tdSub[, pcScoresIDX])
  1224. } else {
  1225. distroGroupSub <- lapply(1:length(distroVarsIDX), genericModelBuild,
  1226. ys = tdSub[, distroVarsIDX],
  1227. predVars = tdSub[, distroBespokePredictors])
  1228. }
  1229. names(distroGroupSub) <- names(tdSub)[distroVarsIDX]
  1230.  
  1231. predict_distro_pixels <- function(pixelsData) {
  1232. runOne <- function(J) {
  1233. if(is.null(distroBespokePredictors)) {
  1234. newdata <- pixelsData[, pcScoresIDX]
  1235. } else {
  1236. newdata <- pixelsData[, distroBespokePredictors]
  1237. }
  1238. return(predict(distroGroupSub[[J]], newdata = newdata))
  1239. }
  1240. temp <- data.frame(do.call(cbind, lapply(X = 1:length(distroGroupSub),
  1241. FUN = runOne)))
  1242. names(temp) <- names(distroGroupSub)
  1243. return(temp)
  1244. }
  1245.  
  1246. distroFrame <- predict_distro_pixels(tdApply)
  1247. distroOut <- as.data.frame(t(colMeans(distroFrame)))
  1248.  
  1249. impPredictors <- c(names(trainingData)[pcScoresIDX])
  1250. impPredictors <- impPredictors[impPredictors != 'pca9']
  1251. sppModsSub <- lapply(speciesLabs, fit_spp_models,
  1252. td = tdSub,
  1253. pcScoresIDX = pcScoresIDX,
  1254. bespokePredictors = impPredictors,
  1255. stan = F)
  1256. impPredsSub <- bind_cols(mclapply(sppModsSub,
  1257. pixels = tdApply,
  1258. predict_spp_importance))
  1259.  
  1260. impOut <- as.data.frame(t(colMeans(impPredsSub))) / sum(colMeans(impPredsSub))
  1261.  
  1262. impOut[impOut < 0] <- 0
  1263. ##We want to make baph and tph figures:
  1264. #stand-wise full model
  1265. #stand-wise reduced model
  1266.  
  1267. #Diameter distribution by stand
  1268.  
  1269. #Species importance by stand
  1270. stockOut <- data.frame("meta_stand_id" = polygon,
  1271. "groupModelPredTPH" = mean(tphPredSub),
  1272. "groupModelPredBAPH" = mean(baPredSub))
  1273. #
  1274. output <- list(stockOut, distroOut, impOut)
  1275.  
  1276. return(output)
  1277. }
  1278. #
  1279. # # Perform groupstrap on all polygons:
  1280. bootResults <- lapply(bootPolygons$meta_stand_id,
  1281. predict_for_one_group)
  1282. #
  1283. # # Define function to pull from bootResults list
  1284. extract_from_list <- function(i, bootResults, place) {
  1285. return(bootResults[[i]][place][[1]])
  1286. }
  1287. #
  1288.  
  1289. # # Extract the tph and ba predictions:
  1290. bootPolygonsResults <- bootPolygons %>%
  1291. bind_cols(do.call(rbind, lapply(1:length(bootResults),
  1292. extract_from_list,
  1293. bootResults = bootResults,
  1294. place = 1)))
  1295. #
  1296. # # Extract the diameter distribution predictions:
  1297. distroPredictions <- bind_rows(lapply(1:length(bootResults),
  1298. extract_from_list,
  1299. bootResults = bootResults,
  1300. place = 2)) %>%
  1301. mutate(meta_stand_id = bootPolygons$meta_stand_id,
  1302. origin = 'model')
  1303. #
  1304. # # Extract the species importance predictions
  1305. impPredictions <- bind_rows(lapply(1:length(bootResults),
  1306. extract_from_list,
  1307. bootResults = bootResults,
  1308. place = 3)) %>%
  1309. mutate(meta_stand_id = bootPolygons$meta_stand_id,
  1310. origin = 'model')
  1311.  
  1312. ```
  1313.  
  1314. ### Basal Area per Hectare
  1315.  
  1316. ```{r groupBAPH}
  1317.  
  1318. baphTable <- bootPolygonsResults %>%
  1319. dplyr::select(meta_stand_id, nPlots, tScore, contains("BAPH")) %>%
  1320. dplyr::mutate(lowerCI = observedBAPH - tScore * seObservedBAPH,
  1321. upperCI = observedBAPH + tScore * seObservedBAPH) %>%
  1322. dplyr::select(meta_stand_id, nPlots, contains("CI"), contains("BAPH"))
  1323. #
  1324. pander(baphTable,
  1325. split.table = Inf,
  1326. justify = 'left',
  1327. digits = 2)
  1328. #
  1329. baphLong <- baphTable %>%
  1330. tidyr::gather(key = fit, value = predicted,
  1331. predBAPH, groupModelPredBAPH)
  1332. #
  1333. baphMax <- baphLong %>%
  1334. dplyr::select(upperCI, predicted) %>%
  1335. max() + 5
  1336. #
  1337. baphColors <- c("darkgreen", "darkseagreen")
  1338. densityLabels <- c("Full model pixel prediction",
  1339. "Groupstrap model pixel prediction")
  1340. #
  1341. baphFaceted <- ggplot(data = baphLong,
  1342. aes(x = observedBAPH, y = predicted, color = fit)) +
  1343. facet_wrap(~ fit) +
  1344. xlab('ST plot prediction') +
  1345. ylab('ST model prediction') +
  1346. ggtitle('Group bootstrap of basal area predictions to approximate model based performance') +
  1347. geom_point() +
  1348. geom_abline(intercept = 0, slope = 1) +
  1349. geom_abline(intercept = 0, slope = 1.1, linetype = 2) +
  1350. geom_abline(intercept = 0, slope = 0.9, linetype = 2) +
  1351. geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI, size = nPlots)) +
  1352. xlim(0, baphMax) +
  1353. ylim(0, baphMax) +
  1354. theme_bw() +
  1355. scale_color_manual(values = baphColors, labels = densityLabels)
  1356.  
  1357. print(baphFaceted)
  1358.  
  1359. ```
  1360.  
  1361. ### Basal Area per Acre
  1362.  
  1363. ```{r clientBAPA}
  1364. #
  1365. bapaTable <- baphTable %>%
  1366. mutate_each(., funs(convert_baph_to_bapa), -meta_stand_id, -nPlots)
  1367.  
  1368. for(name in names(bapaTable)) {
  1369. names(bapaTable)[match(name, names(bapaTable))] <- gsub("BAPH", "BAPA", name)
  1370. }
  1371.  
  1372. bapaMax <- bapaTable %>%
  1373. select(upperCI, contains("BAPA")) %>%
  1374. max() + 5
  1375.  
  1376. clientLayers <- list(geom_point(),
  1377. geom_abline(intercept = 0, slope = 1),
  1378. geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI)),
  1379. theme_bw(),
  1380. theme(text = element_text(size = 18),
  1381. title = element_text(size = 18)),
  1382. geom_abline(intercept = 0, slope = 1.1, linetype = 2),
  1383. geom_abline(intercept = 0, slope = 0.9, linetype = 2)
  1384. )
  1385.  
  1386. ggplot(bapaTable, aes(x = observedBAPA, y = groupModelPredBAPA)) +
  1387. xlab('Estimate of basal area (ft2/ac) from plot data') +
  1388. ylab('ST reduced model prediction of basal area (ft2/ac)') +
  1389. xlim(0, bapaMax) +
  1390. ylim(0, bapaMax) +
  1391. clientLayers
  1392.  
  1393. ggplot(bapaTable, aes(x = observedBAPA, y = predBAPA)) +
  1394. xlab('Estimate of basal area (ft2/ac) from plot data') +
  1395. ylab('ST model prediction of basal area (ft2/ac)') +
  1396. xlim(0, bapaMax) +
  1397. ylim(0, bapaMax) +
  1398. clientLayers
  1399.  
  1400. ```
  1401.  
  1402. ### Trees per Hectare
  1403.  
  1404. ```{r groupTPH}
  1405. #
  1406. tphTable <- bootPolygonsResults %>%
  1407. select(meta_stand_id, nPlots, tScore, contains("TPH")) %>%
  1408. mutate(lowerCI = observedTPH - tScore * seObservedTPH,
  1409. upperCI = observedTPH + tScore * seObservedTPH) %>%
  1410. select(meta_stand_id, nPlots, contains("CI"), contains("TPH"))
  1411.  
  1412. pander(tphTable, split.tables = Inf,
  1413. split.cells = Inf,
  1414. justify = 'left',
  1415. digits = 0)
  1416.  
  1417. tphLong <- tphTable %>%
  1418. gather(key = fit, value = predicted,
  1419. predTPH, groupModelPredTPH)
  1420.  
  1421. tphMax <- tphLong %>%
  1422. select(upperCI, predicted) %>%
  1423. max() + 5
  1424.  
  1425. tphColors <- c("darkblue", "deepskyblue")
  1426.  
  1427. tphGroupFacetPlot <- ggplot(data = tphLong,
  1428. aes(x = observedTPH, y = predicted, color = fit)) +
  1429. xlab('ST plot prediction') +
  1430. ylab('ST model prediction') +
  1431. geom_point() +
  1432. geom_abline(intercept = 0, slope = 1) +
  1433. geom_abline(intercept = 0, slope = 1.1, linetype = 2) +
  1434. geom_abline(intercept = 0, slope = 0.9, linetype = 2) +
  1435. geom_errorbarh(aes(xmin = lowerCI, xmax = upperCI, size = nPlots)) +
  1436. scale_color_manual("", values = tphColors, labels = densityLabels) +
  1437. theme(strip.background = element_blank(), strip.text.x = element_blank()) +
  1438. ggtitle('Group bootstrap of trees per hectare predictions to approximate model-based performance') +
  1439. xlim(0, tphMax) +
  1440. ylim(0, tphMax) +
  1441. theme_bw() +
  1442. facet_wrap( ~ fit)
  1443.  
  1444. print(tphGroupFacetPlot)
  1445.  
  1446. ```
  1447.  
  1448. ### Trees per Acre
  1449.  
  1450. ```{r clientTPA}
  1451. #
  1452. tpaTable <- tphTable %>%
  1453. mutate_each(., funs(convert_acres_to_hectares), -meta_stand_id, -nPlots)
  1454.  
  1455. for (name in names(tpaTable)) {
  1456. names(tpaTable)[match(name, names(tpaTable))] <- gsub("TPH", "TPA", name)
  1457. }
  1458.  
  1459. tpaMax <- tpaTable %>%
  1460. select(upperCI, contains("TPA")) %>%
  1461. max() + 5
  1462.  
  1463. tpaTreelistClientPlot <- ggplot(tpaTable, aes(x = observedTPA,
  1464. y = groupModelPredTPA)) +
  1465. xlab('Estimate of trees per acre from plot data') +
  1466. ylab('ST reduced model prediction of trees per acre') +
  1467. xlim(0, tpaMax) + ylim(0, tpaMax) + clientLayers
  1468.  
  1469. tpaFullModelClientPlot <- ggplot(tpaTable, aes(x = observedTPA,
  1470. y = predTPA)) +
  1471. xlab('Estimate of trees per acre from plot data') +
  1472. ylab('ST model prediction of trees per acre') +
  1473. xlim(0, tpaMax) + ylim(0, tpaMax) + clientLayers
  1474.  
  1475. print(tpaTreelistClientPlot)
  1476. print(tpaFullModelClientPlot)
  1477.  
  1478. ```
  1479.  
  1480. ### Diameter Distribution
  1481.  
  1482. ```{r groupDistro}
  1483. zeroTreeStands <- trainingData %>%
  1484. group_by(meta_stand_id) %>%
  1485. summarize(nPlots = n_distinct(plot_id),
  1486. nZeroes = length(unique(plot_id[tph == 0]))) %>%
  1487. filter(nPlots == nZeroes) %>%
  1488. .$meta_stand_id
  1489.  
  1490. fullDist <- trainingData %>%
  1491. dplyr::select(meta_stand_id, contains('diam')) %>%
  1492. group_by(meta_stand_id) %>%
  1493. dplyr::summarise_each(funs(mean)) %>%
  1494. ungroup() %>%
  1495. filter(meta_stand_id %in% bootPolygons$meta_stand_id) %>%
  1496. mutate(origin = 'trainingData') %>%
  1497. bind_rows(distroPredictions) %>%
  1498. gather(key = diamClass, value = value, -origin, -meta_stand_id) %>%
  1499. mutate(diamClass = as.numeric(gsub('diam_', '', diamClass))) %>%
  1500. filter(value > 0) %>%
  1501. filter(meta_stand_id %nin% zeroTreeStands)
  1502. #
  1503. ggplot(fullDist, aes(x = diamClass, y = value, linetype = origin)) +
  1504. geom_point(alpha = 0.1) + geom_line(alpha = 0.9) +
  1505. facet_wrap( ~ meta_stand_id) + theme_bw() +
  1506. ylab("Relative presence in distribution")
  1507.  
  1508. ```
  1509.  
  1510. ### Species Importance Value
  1511.  
  1512. ```{r groupImp}
  1513. #
  1514. bootSpp <- trainingData %>%
  1515. dplyr::select(meta_stand_id,
  1516. one_of(names(trainingData)[impVarsIDX]),
  1517. tph) %>%
  1518. group_by(meta_stand_id) %>%
  1519. filter(tph > 0 ) %>%
  1520. dplyr::summarise_each(funs(mean)) %>%
  1521. select(-tph) %>%
  1522. ungroup() %>%
  1523. filter(meta_stand_id %in% bootPolygons$meta_stand_id) %>%
  1524. mutate(origin = 'trainingData') %>%
  1525. bind_rows(impPredictions) %>%
  1526. gather(key = spp, value = value, -origin, -meta_stand_id) %>%
  1527. mutate(spp = as.numeric(gsub('spp_', '', spp)),
  1528. common = get_common(spp)) %>%
  1529. filter(meta_stand_id %nin% zeroTreeStands)
  1530.  
  1531. pal12 <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
  1532. "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A",
  1533. "#FFFF99", "#B15928")
  1534.  
  1535. #Get correct number of distinct, non-annoying colors:
  1536. colorCount <- length(unique(bootSpp$common))
  1537. useColors <- colorRampPalette(pal12)(colorCount)
  1538.  
  1539. impGroupBarPlot <- ggplot(bootSpp, aes(x = origin, y = value)) +
  1540. geom_bar(stat = "identity", aes(fill = common)) +
  1541. scale_fill_manual(values = useColors,
  1542. guide = guide_legend(nrow = 18)) +
  1543. facet_wrap(~ meta_stand_id) + theme_bw() +
  1544. ylab('Species Importance Value')
  1545.  
  1546. impGroupLinePlot <- ggplot(bootSpp, aes(x = common, y = value)) +
  1547. geom_line(aes(color = origin, group = origin)) +
  1548. ylab("Species importance") +
  1549. facet_wrap(~ meta_stand_id) + theme_bw() +
  1550. scale_color_manual(values = c("black", "skyblue2")) +
  1551. theme(axis.text.x=element_text(angle=45, hjust = 1))
  1552.  
  1553. print(impGroupBarPlot)
  1554.  
  1555. print(impGroupLinePlot)
  1556.  
  1557. ```
  1558.  
  1559. ------------
  1560.  
  1561. Memory Flush
  1562. ------------
  1563.  
  1564. ```{r saveCore}
  1565.  
  1566. # save objects
  1567.  
  1568. save(tphPooled, baphPooled, distroSet,
  1569. sppMods, predict_spp_importance,
  1570. baphPrecision, tphPrecision,
  1571. impBespokePredictors, distroBespokePredictors,
  1572. transform_the_predictors,
  1573. transform_the_baph_prediction,
  1574. transform_the_tph_prediction,
  1575. wrapMods,
  1576. strataVar,
  1577. populationCruiseLimits,
  1578. strataCruiseLimits, stratTab,
  1579. baphCoeffTable, tphCoeffTable,
  1580. baphPredFrame, tphPredFrame,
  1581. file = file.path(modelDir, 'core_models.Rda'))
  1582.  
  1583. save(standRatios,
  1584. strataRatios,
  1585. populationRatios,
  1586. strataVar,
  1587. strataDistroRatios,
  1588. populationDistroRatios,
  1589. standDistroRatios,
  1590. strataImpRatios,
  1591. populationImpRatios,
  1592. standImpRatios,
  1593. maCIs,
  1594. file = file.path(modelDir, 'standRatios.Rda'))
  1595.  
  1596. write.csv(baphTable, file.path(reportDir, "baphTable.csv"), row.names = F)
  1597. write.csv(tphTable, file.path(reportDir, "tphTable.csv"), row.names = F)
  1598. write.csv(bapaTable, file.path(reportDir, "bapaTable.csv"), row.names = F)
  1599. write.csv(tpaTable, file.path(reportDir, "tpaTable.csv"), row.names = F)
  1600.  
  1601. ```
  1602.  
  1603. --------------
  1604.  
  1605. Total Height Model
  1606. --------------
  1607.  
  1608. ```{r heightPrep, eval=eval_height}
  1609.  
  1610. medianBAPA <- median(trainingData$baph) * 4.356
  1611.  
  1612. # convert total/stopper heights to feet
  1613. cruiseDataNoZero$hm <- convert_meters_to_feet(cruiseDataNoZero$hm)
  1614. cruiseDataNoZero$hp <- convert_meters_to_feet(cruiseDataNoZero$hp)
  1615. cruiseDataNoZero$hs <- convert_meters_to_feet(cruiseDataNoZero$hs)
  1616.  
  1617. lat <- mean(parse_lat_lon(cruiseData$loc, coord = 'lat'))
  1618. lon <- mean(parse_lat_lon(cruiseData$loc, coord = 'lon'))
  1619.  
  1620. radius <- 150
  1621.  
  1622. gleanedFile <- file.path(modelDir, 'gleanedList.Rda')
  1623.  
  1624. if (file.exists(gleanedFile)) {
  1625. load(gleanedFile)
  1626. } else {
  1627.  
  1628. minSpp <- 0
  1629.  
  1630. while (radius < 250 & minSpp < 3) {
  1631. gleanedList <- glean_tree_records(lat, lon, radius)
  1632.  
  1633. gleanSummary <- gleanedList$trees %>%
  1634. mutate(st_species = fia2st(SPCD)) %>%
  1635. filter(st_species %in% unique(cruiseData$st_species)) %>%
  1636. group_by(st_species) %>%
  1637. summarize(count = length(st_species))
  1638.  
  1639. minSpp <- min(gleanSummary$count)
  1640. radius <- radius + 50
  1641. }
  1642.  
  1643. save(gleanedList, file = gleanedFile)
  1644. }
  1645.  
  1646. # we need to drop snags
  1647. cruiseDataHeight <- cruiseDataNoZero %>%
  1648. select(plot_id, st_species, common, diameter, expansion,
  1649. hm, tqi, start_height_1, hs, hp) %>%
  1650. left_join(trainingData %>%
  1651. mutate(plot_id = as.character(plot_id)) %>%
  1652. select(plot_id, baph, tph))
  1653.  
  1654. # create a plottable height dataset
  1655. cruiseDataHtPlot <- cruiseDataHeight %>%
  1656. select(common, diameter, hm, tqi, hs, hp) %>%
  1657. gather(heightType, height, -common, -diameter, -tqi) %>%
  1658. mutate(height_type = case_when(heightType == 'hm' ~ 'total',
  1659. heightType == 'hs' ~ 'saw_stopper',
  1660. heightType == 'hp' ~ 'pulp_stopper')) %>%
  1661. filter(!is.na(height))
  1662.  
  1663.  
  1664. totalAndMerchHtPlot <- ggplot(data = cruiseDataHtPlot,
  1665. aes(x = diameter, y = height, color = height_type)) +
  1666. geom_point(alpha = 0.4) +
  1667. facet_wrap(~ common, labeller = labeller(common = label_wrap_gen(width = 15))) +
  1668. theme_bw()
  1669.  
  1670. print(totalAndMerchHtPlot)
  1671. ```
  1672.  
  1673. ## FIA height models
  1674.  
  1675. ```{r heightFIA, eval=eval_height}
  1676.  
  1677. # drop species that aren't in FIA database
  1678. cruiseDataFia <- cruiseDataHeight %>%
  1679. mutate(SPCD = st2fia(st_species)) %>%
  1680. filter(SPCD %in% unique(gleanedList$trees$SPCD))
  1681.  
  1682. htModelFIA <- freq_height(species = cruiseDataFia$st_species,
  1683. diameter = cruiseDataFia$diameter,
  1684. height = cruiseDataFia$hm,
  1685. expansion = cruiseDataFia$expansion,
  1686. bapa = medianBAPA,
  1687. latitude = lat,
  1688. longitude = lon,
  1689. speciesCode = 'ST',
  1690. gleanedData = gleanedList)
  1691.  
  1692. cruiseDataHeight$predHeightFia <- predict(htModelFIA,
  1693. newSpp = cruiseDataHeight$st_species,
  1694. newDiam = cruiseDataHeight$diameter,
  1695. speciesCode = 'ST')
  1696.  
  1697. if(!all(is.na(cruiseDataHeight$hm))) {
  1698. fiaHtPlot <- ggplot(aes(x = hm, y = predHeightFia),
  1699. data = cruiseDataHeight) +
  1700. geom_point(alpha = 0.2) +
  1701. theme_bw() +
  1702. geom_abline() +
  1703. facet_wrap(~common) +
  1704. ggtitle('FIA height models')
  1705.  
  1706. print(fiaHtPlot)
  1707. }
  1708.  
  1709. ```
  1710.  
  1711. ## Local height models
  1712.  
  1713. ```{r heightLocal, eval=eval_height}
  1714. # drop species that have fewer than 5 observations
  1715. cruiseDataLocal <- cruiseDataHeight %>%
  1716. group_by(common) %>%
  1717. mutate(n = n()) %>%
  1718. filter(n >= 5) %>%
  1719. ungroup() %>%
  1720. filter(!is.na(hm))
  1721.  
  1722. # if no total height observations, do not try to fit local height models
  1723. if(nrow(cruiseDataLocal) > 0) {
  1724. htModelLocal <- freq_height(species = cruiseDataLocal$st_species,
  1725. diameter = cruiseDataLocal$diameter,
  1726. height = cruiseDataLocal$hm)
  1727.  
  1728. cruiseDataHeight$predHeightLocal <- predict(htModelLocal,
  1729. newSpp = cruiseDataHeight$st_species,
  1730. newDiam = cruiseDataHeight$diameter,
  1731. speciesCode = 'ST')
  1732.  
  1733. localHtPlot <- ggplot(aes(x = hm, y = predHeightLocal),
  1734. data = cruiseDataHeight) +
  1735. geom_point(alpha = 0.2) +
  1736. theme_bw() +
  1737. geom_abline() +
  1738. facet_wrap(~common, labeller =
  1739. labeller(common = label_wrap_gen(width = 15)))
  1740. print(localHtPlot)
  1741. }
  1742.  
  1743.  
  1744. ```
  1745.  
  1746. ## Final height models
  1747.  
  1748. Diagnostics for the final height model are displayed below. The default choice is the local freq_height model. Change this if you want to use the FIA model or a hybrid FIA/local moodel.
  1749.  
  1750. ```{r heightFinal, eval=eval_height}
  1751.  
  1752. # htModel <- htModelFIA
  1753. htModel <- htModelLocal
  1754.  
  1755. # make predictions to cruised trees using final model
  1756. cruiseDataHeight$predHeightFinal <- predict(htModel,
  1757. newSpp = cruiseDataHeight$st_species,
  1758. newDiam = cruiseDataHeight$diameter,
  1759. speciesCode = 'ST')
  1760.  
  1761. heightComp <- cruiseDataHeight %>%
  1762. select(diameter, common, expansion,
  1763. hm, predHeightFinal) %>%
  1764. gather(source, totalHeight, hm, predHeightFinal,
  1765. -diameter, -common, -expansion) %>%
  1766. mutate(source = case_when(source == 'total_height' ~ 'observed',
  1767. source == 'predHeightFinal' ~ 'predicted'))
  1768.  
  1769. htDiamPlot <- ggplot(data = heightComp,
  1770. aes(x = diameter, y = totalHeight,
  1771. color = source)) +
  1772. geom_point(alpha = 0.4) +
  1773. theme_bw() +
  1774. facet_wrap(~ common,
  1775. labeller =
  1776. labeller(common = label_wrap_gen(width = 15))) +
  1777. ylab('Total height (ft)') +
  1778. xlab('DBH (in)') +
  1779. ggtitle('Total height model predictions vs observed values')
  1780.  
  1781. print(htDiamPlot)
  1782. clientHt <- htDiamPlot +
  1783. scale_color_manual(values = c('#30363E', #stDarkGreen
  1784. '#347a35')) #stGrey
  1785.  
  1786. print(clientHt)
  1787.  
  1788. print(plot(htModel))
  1789.  
  1790. save(htModel, file = file.path(modelDir, 'htModel.Rda'))
  1791.  
  1792. ```
  1793.  
  1794. --------------
  1795.  
  1796. Merchantable Height Model
  1797. --------------
  1798.  
  1799. ```{r auxProportionalSystem}
  1800.  
  1801. # generic proportional assignment by diameter, spp or sppGroup, and height class
  1802. # can be used for any tree-level variable
  1803.  
  1804. build_aux_model <- function(trees, # modeling dataset
  1805. attribute, # name of attribute to be modeled
  1806. stand = F,
  1807. spp = F, # group by species?
  1808. sppGroup = NULL, # group by a species group from st_sp2 instead of common name?
  1809. htClass = F, # group by height class?
  1810. groupVars = NULL # other grouping variable from metadata?
  1811. ) {
  1812.  
  1813. # identify stands that will get stand-specific predictions
  1814. if(stand) {
  1815. modStands <- trainingData %>%
  1816. filter(plot_id %nin% zeroPlots) %>%
  1817. select(meta_stand_id, plot_id) %>%
  1818. group_by(meta_stand_id) %>%
  1819. summarize(nPlots = n()) %>%
  1820. filter(nPlots >= 5) %>%
  1821. .$meta_stand_id
  1822. } else {
  1823. modStands <- c()
  1824. }
  1825.  
  1826. dat <- trees %>%
  1827. left_join(trainingData %>% mutate(plot_id = as.character(plot_id)))
  1828.  
  1829. # define the group that each tree is in
  1830. if(spp == T) {
  1831. dat$group <- dat$common
  1832. }
  1833. if(!is.null(sppGroup)) {
  1834. if(sppGroup %nin% c('class', 'group', 'genus')) {
  1835. stop('sppGroup must be one of these: class, group, or genus')
  1836. }
  1837. dat$group <- st_sp2[[sppGroup]][match(dat$common, st_sp2$common)]
  1838. }
  1839. if(htClass == T) {
  1840. # predict height if tree does not have total height
  1841. dat$finalPredHeight <- predict(htModel,
  1842. newSpp = dat$st_species,
  1843. newDiam = dat$diameter,
  1844. speciesCode = 'ST')
  1845. # height for height class is either measured height, or predicted
  1846. dat$finalPredHeight[!is.na(dat$total_height)] <- dat$total_height[!is.na(dat$total_height)]
  1847.  
  1848. # maximum observed height rounded up to nearest 10
  1849. maxHt <- 10 * ceiling(max(dat$finalPredHeight, na.rm = T) / 10)
  1850.  
  1851. # 10 ft height classes
  1852. dat$htClass <- cut(dat$finalPredHeight,
  1853. breaks = seq(0, maxHt, by = 10),
  1854. labels = seq(5, maxHt - 5, by = 10))
  1855.  
  1856. # tack height class onto existing groupings
  1857. dat$group <- paste(dat$group, dat$htClass, sep = '_')
  1858. }
  1859.  
  1860. if(!is.null(groupVars)) {
  1861. dat <- dat %>%
  1862. mutate_at(vars(one_of(groupVars)), funs(as.character)) %>%
  1863. unite(group, one_of('group', groupVars), sep = '_')
  1864. }
  1865.  
  1866. # proportions in each group x diameter combination
  1867. tableVars <- c('group', 'diameter')
  1868. propTab <- dat %>%
  1869. select(one_of(tableVars), expansion,
  1870. attr = one_of(attribute)) %>%
  1871. mutate(diameter = round(diameter)) %>%
  1872. group_by_at(vars(one_of(tableVars), attr)) %>%
  1873. summarize(n = sum(expansion)) %>%
  1874. mutate(prop = n / sum(n)) %>%
  1875. select(-n) %>%
  1876. ungroup()
  1877.  
  1878.  
  1879. # proportions in each group x diameter x stand combination
  1880. standGroupVars <- c('meta_stand_id', 'group', 'diameter')
  1881. propTabStand <- dat %>%
  1882. select(one_of(standGroupVars), expansion,
  1883. attr = one_of(attribute)) %>%
  1884. mutate(diameter = round(diameter)) %>%
  1885. group_by_at(vars(one_of(standGroupVars), attr)) %>%
  1886. summarize(n = sum(expansion)) %>%
  1887. mutate(prop = n / sum(n)) %>%
  1888. select(-n) %>%
  1889. ungroup() %>%
  1890. filter(meta_stand_id %in% modStands)
  1891.  
  1892.  
  1893. predict_aux <- function(treeList, model) {
  1894.  
  1895. propTab <- model$propTab
  1896. propTabStand <- model$propTabStand
  1897. attribute <- model$attribute
  1898. spp <- model$spp
  1899. sppGroup <- model$sppGroup
  1900. groupVars <- model$groupVars
  1901. htClass <- model$htClass
  1902.  
  1903. temp <- treeList %>%
  1904. mutate(diameter = round(diameter)) %>%
  1905. mutate(rec = row_number())
  1906.  
  1907. if(spp == T) {
  1908. temp$group <- temp$common
  1909. }
  1910. if(!is.null(sppGroup)) {
  1911. temp$group <- st_sp2[[sppGroup]][match(temp$common, st_sp2$common)]
  1912. }
  1913. if(htClass == T) {
  1914. maxHt <- 10 * ceiling(max(temp$total_height, na.rm = T) / 10)
  1915. temp$htClass <- cut(temp$total_height,
  1916. breaks = seq(0, maxHt, by = 10),
  1917. labels = seq(5, maxHt - 5, by = 10))
  1918.  
  1919. temp$group <- paste(temp$group, temp$htClass, sep = '_')
  1920. }
  1921. if(!is.null(groupVars)) {
  1922. temp <- temp %>%
  1923. mutate_at(vars(one_of(groupVars)), funs(as.character)) %>%
  1924. unite(group, one_of('group', groupVars), sep = '_')
  1925. }
  1926.  
  1927. # unique diameter x group combinations
  1928. combos <- temp %>%
  1929. select(group, diameter) %>%
  1930. distinct() %>%
  1931. arrange(group, diameter)
  1932.  
  1933. predict_by_group <- function(grp, diam) {
  1934. subTemp <- temp %>%
  1935. filter(round(diameter) == diam,
  1936. group == grp)
  1937.  
  1938. # grab stand-level proportions if stand is in table
  1939. if(unique(subTemp$meta_stand_id) %in% unique(propTabStand$meta_stand_id)) {
  1940. probs <- propTabStand %>%
  1941. ungroup() %>%
  1942. filter(group == grp, meta_stand_id == unique(treeList$meta_stand_id)) %>%
  1943. mutate(diamDist = abs(diameter - diam)) %>%
  1944. filter(diamDist == min(diamDist)) %>%
  1945. select(-diamDist)
  1946.  
  1947. if(nrow(probs) < 1) {
  1948. probs <- propTab %>%
  1949. ungroup() %>%
  1950. filter(group == grp) %>%
  1951. mutate(diamDist = abs(diameter - diam)) %>%
  1952. filter(diamDist == min(diamDist)) %>%
  1953. select(-diamDist)
  1954. }
  1955. if(nrow(probs) < 1 & htClass) {
  1956. decomposed <- subTemp %>%
  1957. mutate(htClass = as.numeric(strsplit(grp, '_')[[1]][2]),
  1958. common = strsplit(grp, '_')[[1]][1])
  1959.  
  1960. spp <- unique(decomposed$common)
  1961. ht <- unique(decomposed$htClass)
  1962. probs <- propTab %>%
  1963. ungroup() %>%
  1964. separate(group, c('common', 'htClass'), sep = '_') %>%
  1965. mutate(htClass = as.numeric(htClass)) %>%
  1966. filter(common == spp) %>%
  1967. mutate(diamDist = abs(diameter - diam),
  1968. htDist = abs(htClass - ht)) %>%
  1969. filter(diamDist == min(diamDist)) %>%
  1970. filter(htDist == min(htDist)) %>%
  1971. select(-diamDist, -htDist)
  1972. }
  1973. } else { # otherwise used pooled proportions
  1974. probs <- propTab %>%
  1975. ungroup() %>%
  1976. filter(group == grp) %>%
  1977. mutate(diamDist = abs(diameter - diam)) %>%
  1978. filter(diamDist == min(diamDist)) %>%
  1979. select(-diamDist)
  1980. }
  1981.  
  1982.  
  1983. if(nrow(probs) > 1) {
  1984. subTemp$attr <- sample(probs$attr, size = nrow(subTemp),
  1985. prob = probs$prop, replace = T)
  1986. }
  1987.  
  1988. if(nrow(probs) == 1) {
  1989. subTemp$attr <- probs$attr
  1990. }
  1991.  
  1992.  
  1993. return(subTemp)
  1994. }
  1995.  
  1996. predict_all <- function(i) {
  1997. run <- combos[i, ]
  1998. preds <- predict_by_group(diam = run$diameter,
  1999. grp = run$group)
  2000. }
  2001.  
  2002. outTrees <- bind_rows(lapply(1:nrow(combos), predict_all)) %>%
  2003. select(rec, attr)
  2004.  
  2005. outPreds <- temp %>%
  2006. left_join(outTrees, by = 'rec') %>%
  2007. arrange(rec) %>%
  2008. .$attr
  2009.  
  2010. return(outPreds)
  2011.  
  2012. }
  2013.  
  2014. outList <- list(
  2015. propTab = propTab, propTabStand = propTabStand,
  2016. predict_aux = predict_aux,
  2017. attribute = attribute,
  2018. spp = spp,
  2019. sppGroup = sppGroup,
  2020. htClass = htClass,
  2021. groupVars = groupVars)
  2022.  
  2023.  
  2024. return(outList)
  2025. }
  2026.  
  2027. ```
  2028.  
  2029. ## Merch height proportional assignment system
  2030. ```{r merch_heightProportional, eval = eval_merch_height, results = 'markup'}
  2031.  
  2032. # TODO incorporate HFM-style two-stage stopper height modeling framework
  2033. # e.g. predict occurrence of stopper height, then predict stopper height for
  2034. # trees that get a stopper height
  2035.  
  2036. # if NA merch heights are trees on which merch height was not observed,
  2037. # filter those out.
  2038.  
  2039. # otherwise, do not filter out NAs since an NA might mean no stopper height
  2040. merchDat <- cruiseDataNoZero
  2041.  
  2042. #Saw stopper height
  2043. hsPropSet <- build_aux_model(trees = merchDat,
  2044. attribute = 'hs',
  2045. spp = T,
  2046. sppGroup = NULL,
  2047. htClass = F,
  2048. stand = F,
  2049. groupVars = NULL)
  2050.  
  2051. predict_hs <- hsPropSet$predict_aux
  2052.  
  2053. #Pulp stopper height
  2054. hpPropSet <- build_aux_model(trees = merchDat,
  2055. attribute = 'hp',
  2056. spp = T,
  2057. sppGroup = NULL,
  2058. htClass = F,
  2059. stand = F,
  2060. groupVars = NULL)
  2061.  
  2062. predict_hp <- hpPropSet$predict_aux
  2063.  
  2064.  
  2065. fullTl <- cruiseDataNoZero %>%
  2066. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  2067. filter(!is.na(meta_stand_id))
  2068.  
  2069. stands <- unique(fullTl$meta_stand_id)
  2070.  
  2071. # predict merch height then summarize by diameter and species for each stand
  2072. compTab <- bind_rows(lapply(stands,
  2073. function(stand) {
  2074. standTl <- fullTl %>%
  2075. filter(meta_stand_id == stand)
  2076.  
  2077. standTl$hs_pred <- predict_hs(
  2078. treeList = standTl,
  2079. model = hsPropSet)
  2080.  
  2081. standTl$hp_pred <- predict_hp(
  2082. treeList = standTl,
  2083. model = hpPropSet
  2084. )
  2085.  
  2086. out <- standTl %>%
  2087. select(meta_stand_id, diameter, common, expansion,
  2088. hs, hp, hs_pred, hp_pred) %>%
  2089. gather(source, mht, hs_pred, hs, hp_pred, hp, -meta_stand_id,
  2090. -diameter, -common, -expansion) %>%
  2091. mutate(tpa = expansion,
  2092. bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
  2093. group_by(meta_stand_id, diameter, common, source) %>%
  2094. summarize(meanMerchHeight = mean(mht, na.rm = T),
  2095. nTrees = n()) %>%
  2096. mutate(source = ifelse(source == 'hs', 'observed', 'predicted'))
  2097.  
  2098. return(out)
  2099. })
  2100. )
  2101.  
  2102. # make plot a little better
  2103. compTab$source <- factor(compTab$source, levels = c('observed', 'predicted'))
  2104.  
  2105. ggplot(data = compTab, aes(x = diameter, y = meanMerchHeight,
  2106. color = source)) +
  2107. geom_jitter(alpha = 0.5) +
  2108. theme_bw() +
  2109. facet_wrap(~ common,
  2110. labeller = labeller(common = label_wrap_gen(width = 15))) +
  2111. ylab('mean merchantable height (ft)') +
  2112. xlab('DBH (in)')
  2113.  
  2114. # if you are happy with the proportional assignment, you can skip the parametric
  2115. # modeling section.
  2116.  
  2117.  
  2118. ```
  2119.  
  2120. ## Parametric merch height model
  2121. ```{r merch_heightModel, eval = eval_merch_height}
  2122. ### This default should change to beta regression
  2123. #
  2124. # merchTrain <- cruiseDataNoZero %>%
  2125. # select(st_species, common, diameter, hm, hs, hp) %>%
  2126. # mutate(predHt = predict(htModel,
  2127. # newSpp = st_species,
  2128. # newDiam = diameter,
  2129. # speciesCode = "ST")) %>%
  2130. # mutate(hs_pct = hs / hm,
  2131. # pred_hs_pct = hs / predHt,
  2132. # hp_pct = hp / hm,
  2133. # pred_hp_pct = hp / predHt)
  2134. #
  2135. # # Counts the percentage of records that have a merch height recorded
  2136. # pctData <- merchTrain %>%
  2137. # mutate(hasHs = ifelse(!is.na(hs) & !is.na(hm), T, F),
  2138. # hasHp = ifelse(!is.na(hp) & !is.na(hm), T, F)) %>%
  2139. # summarize(hasHs = mean(hasHs),
  2140. # hasHp = mean(hasHp))
  2141. #
  2142. # numHsPct <- pctData$hasHs * 100
  2143. #
  2144. # numHpPct <- pctData$hasHp * 100
  2145. #
  2146. # #
  2147. # catMsg(paste(round(numHsPct, 2), "percent of the plot data had saw stopper and total
  2148. # heights recorded", sep = " "))
  2149. #
  2150. # catMsg(paste(round(numHpPct, 2), "percent of the plot data had pulp stopper and total
  2151. # heights recorded", sep = " "))
  2152. #
  2153. #
  2154. # # If 15% or more trees have merch ht and
  2155. # # total height, models are based on recorded data.
  2156. # # If less than 15%, predicted total height will be
  2157. # # used. 15% is arbitrary and can be changed.
  2158. # if (numHsPct >= .15) {
  2159. # hsModel <- lmer(asin(hs_pct) ~ diameter +
  2160. # (1 + diameter | common),
  2161. # data = merchTrain)
  2162. # } else {
  2163. # hsModel <- lmer(asin(pred_hs_pct) ~ diameter +
  2164. # (1 + diameter | common),
  2165. # data = merchTrain)
  2166. #
  2167. # warning("Predicted total heights were used")
  2168. # }
  2169. #
  2170. # if (numHpPct >= .15) {
  2171. # hpModel <- lmer(asin(hp_pct) ~ diameter +
  2172. # (1 + diameter | common),
  2173. # data = merchTrain)
  2174. # } else {
  2175. # hpModel <- lmer(asin(pred_hp_pct) ~ diameter +
  2176. # (1 + diameter | common),
  2177. # data = merchTrain)
  2178. #
  2179. # warning("Predicted total heights were used")
  2180. # }
  2181. #
  2182. # summary(hsModel)
  2183. # summary(hpModel)
  2184. #
  2185. # # predict method for asin - MODIFY if changing the model form
  2186. # predict_merch <- function(treeList, model) {
  2187. # predPcts <- predict(model$model, newdata = treeList,
  2188. # allow.new.levels = T)
  2189. #
  2190. # predHeights <- treeList$total_height * sin(predPcts) # for arcsine transformation
  2191. #
  2192. # return(predHeights)
  2193. # }
  2194. #
  2195. # hsModelSet <- list(hsModel = hsModel, hpModel = hpModel, predict_aux = predict_merch)
  2196. #
  2197. #
  2198. # if (pctRecordTotHt >= .15) {
  2199. # merchTrain$merch_hat <- predict_merch(treeList = merchTrain,
  2200. # model = merchModelSet)
  2201. # } else {
  2202. # merchTrain$total_height <- merchTrain$predHt
  2203. # merchTrain$merch_hat <- predict_merch(treeList = merchTrain,
  2204. # model = merchModelSet)
  2205. # }
  2206. #
  2207. # ggplot(merchTrain, aes(x = stop_height_1, y = merch_hat)) +
  2208. # geom_point(alpha = 0.5) +
  2209. # facet_wrap(~ common,
  2210. # labeller = labeller(common = label_wrap_gen(width = 15))) +
  2211. # theme_bw() +
  2212. # geom_abline(intercept = 0, slope = 1) +
  2213. # xlab("Observed merch height (feet)") +
  2214. # ylab("Predicted merch height (feet)")
  2215. #
  2216.  
  2217. ```
  2218.  
  2219. Select either the parametric or a the proportional merch model system. The default choice is the proportional assignment system. Change this if you want to use a statistical model.
  2220.  
  2221. ```{r pickMerchModel, eval = eval_merch_height}
  2222. # by default, the merch model is the proportional assignment system
  2223. # change to merch Model if you desire to use the statistical model
  2224. hsSet <- hsPropSet
  2225. hpSet <- hpPropSet
  2226. # merchSet <- merchModelSet
  2227.  
  2228. save(hsSet, hpSet, file = file.path(modelDir, 'merchModel.Rda'))
  2229.  
  2230. ```
  2231.  
  2232. --------------
  2233.  
  2234. Grade Model
  2235. --------------
  2236.  
  2237. ```{r grade, eval = eval_grade}
  2238.  
  2239.  
  2240. # TODO: Convert to segment/product handling:
  2241. # build a product field based on how we want to predict grade
  2242. # e.g. if multiple segments, slap segment lengths/grades into one field
  2243.  
  2244.  
  2245. # if NA grades are trees on which grade was not observed,
  2246. # filter those out.
  2247.  
  2248. # otherwise, do not filter out NAs since an NA might mean no grade assignment!
  2249. # table(cruiseDataNoZero$common, cruiseDataNoZero$segment_product_1, useNA = 'always')
  2250.  
  2251. gradeDat <- cruiseDataNoZero %>%
  2252. filter(!is.na(tqi))
  2253.  
  2254. gradeSet <- build_aux_model(trees = gradeDat,
  2255. attribute = 'tqi',
  2256. spp = T,
  2257. sppGroup = NULL,
  2258. htClass = F,
  2259. stand = F,
  2260. groupVars = NULL)
  2261.  
  2262. predict_grade <- gradeSet$predict_aux
  2263.  
  2264. # test it out
  2265. fullTl <- cruiseDataNoZero %>%
  2266. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  2267. filter(!is.na(meta_stand_id))
  2268.  
  2269. stands <- unique(fullTl$meta_stand_id)
  2270.  
  2271. # predict grade then summarize by diameter and species for each stand
  2272. compTabGrade <- bind_rows(lapply(stands,
  2273. function(stand) {
  2274. standTl <- fullTl %>%
  2275. filter(meta_stand_id == stand)
  2276.  
  2277. standTl$grade_pred <- predict_grade(
  2278. treeList = standTl,
  2279. model = gradeSet
  2280. )
  2281.  
  2282. out <- standTl %>%
  2283. select(meta_stand_id, diameter, common, expansion,
  2284. tqi, grade_pred) %>%
  2285. gather(source, grade, tqi, grade_pred, -meta_stand_id,
  2286. -diameter, -common, -expansion) %>%
  2287. mutate(tpa = expansion) %>%
  2288. group_by(meta_stand_id, diameter, common, source, grade) %>%
  2289. summarize(gradeTpa = sum(tpa)) %>%
  2290. mutate(gradeProp = gradeTpa / sum(gradeTpa)) %>%
  2291. ungroup() %>%
  2292. mutate(source = ifelse(source == 'segment_product_1', 'observed', 'predicted'))
  2293.  
  2294. return(out)
  2295. })
  2296. )
  2297.  
  2298. # TODO think of a way to plot grade model diagnostics
  2299.  
  2300.  
  2301. save(gradeSet, file = file.path(modelDir, 'gradeModel.Rda'))
  2302.  
  2303. ```
  2304.  
  2305.  
  2306. --------------
  2307.  
  2308. Crown Ratio Model
  2309. --------------
  2310.  
  2311. ## Crown ratio proportional assignment system
  2312.  
  2313. ```{r crownRatio_proportional, eval = eval_crown}
  2314.  
  2315. crownDat <- cruiseDataNoZero %>%
  2316. filter(!is.na(thi))
  2317.  
  2318. crownPropSet <- build_aux_model(trees = crownDat,
  2319. attribute = 'thi',
  2320. spp = T,
  2321. sppGroup = NULL,
  2322. htClass = F,
  2323. stand = F,
  2324. groupVars = NULL)
  2325.  
  2326. fullTl <- cruiseDataNoZero %>%
  2327. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  2328. filter(!is.na(meta_stand_id))
  2329.  
  2330. stands <- unique(fullTl$meta_stand_id)
  2331.  
  2332. predict_crown <- crownPropSet$predict_aux
  2333.  
  2334. # predict merch height then summarize by diameter and species for each stand
  2335. compTabCrown <- bind_rows(lapply(stands,
  2336. function(stand) {
  2337. standTl <- fullTl %>%
  2338. filter(meta_stand_id == stand)
  2339.  
  2340. standTl$crownRatioPred <- predict_crown(
  2341. treeList = standTl,
  2342. model = crownPropSet
  2343. )
  2344.  
  2345. out <- standTl %>%
  2346. select(meta_stand_id, diameter, common, expansion,
  2347. thi, crownRatioPred) %>%
  2348. gather(source, crown_ratio, thi, crownRatioPred, -meta_stand_id,
  2349. -diameter, -common, -expansion) %>%
  2350. mutate(tpa = expansion,
  2351. bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
  2352. group_by(meta_stand_id, diameter, common, source) %>%
  2353. summarize(meanCrownRatio = mean(crown_ratio),
  2354. nTrees = n()) %>%
  2355. mutate(source = ifelse(source == 'crownRatio', 'observed', 'predicted'))
  2356.  
  2357. return(out)
  2358. })
  2359. )
  2360.  
  2361. # make plot a little better
  2362. compTabCrown$source <- factor(compTabCrown$source, levels = c('observed', 'predicted'))
  2363.  
  2364. # ggplot(data = compTabCrown, aes(x = diameter, y = meanCrownRatio,
  2365. # color = source)) +
  2366. # geom_point(alpha = 0.5) +
  2367. # theme_bw() +
  2368. # facet_wrap(~ common,
  2369. # labeller = labeller(common = label_wrap_gen(width = 15))) +
  2370. # ylab('mean crown ratio') +
  2371. # xlab('DBH (in)')
  2372.  
  2373. # if you are happy with the proportional assignment, you can skip the parametric
  2374. # modeling section.
  2375.  
  2376.  
  2377. ###DAMAGE####
  2378. damageDat <- cruiseDataNoZero %>%
  2379. filter(!is.na(thinDamage))
  2380.  
  2381. damagePropSet <- build_aux_model(trees = damageDat,
  2382. attribute = 'thinDamage',
  2383. spp = T,
  2384. sppGroup = NULL,
  2385. htClass = F,
  2386. stand = F,
  2387. groupVars = NULL)
  2388.  
  2389. fullTl <- cruiseDataNoZero %>%
  2390. left_join(trainingData %>% mutate(plot_id = as.character(plot_id))) %>%
  2391. filter(!is.na(meta_stand_id))
  2392.  
  2393. stands <- unique(fullTl$meta_stand_id)
  2394.  
  2395. predict_damage <- damagePropSet$predict_aux
  2396.  
  2397. # predict merch height then summarize by diameter and species for each stand
  2398. compTabDamage <- bind_rows(lapply(stands,
  2399. function(stand) {
  2400. standTl <- fullTl %>%
  2401. filter(meta_stand_id == stand)
  2402.  
  2403. standTl$damagePred <- predict_damage(
  2404. treeList = standTl,
  2405. model = damagePropSet
  2406. )
  2407.  
  2408. out <- standTl %>%
  2409. select(meta_stand_id, diameter, common, expansion,
  2410. thinDamage, damagePred) %>%
  2411. gather(source, damage, thinDamage, damagePred, -meta_stand_id,
  2412. -diameter, -common, -expansion) %>%
  2413. mutate(tpa = expansion,
  2414. bapa = calculate_inches_to_ba_ft2(diameter) * expansion) %>%
  2415. group_by(meta_stand_id, diameter, common, source) %>%
  2416. summarize(meanDamage = mean(damage),
  2417. nTrees = n()) %>%
  2418. mutate(source = ifelse(source == 'damage', 'observed', 'predicted'))
  2419.  
  2420. return(out)
  2421. })
  2422. )
  2423.  
  2424. # make plot a little better
  2425. compTabDamage$source <- factor(compTabDamage$source, levels = c('observed', 'predicted'))
  2426.  
  2427. # ggplot(data = compTabDamage, aes(x = diameter, y = meanDamage,
  2428. # color = source)) +
  2429. # geom_point(alpha = 0.5) +
  2430. # theme_bw() +
  2431. # facet_wrap(~ common,
  2432. # labeller = labeller(common = label_wrap_gen(width = 15))) +
  2433. # ylab('mean crown ratio') +
  2434. # xlab('DBH (in)')
  2435.  
  2436.  
  2437.  
  2438. ```
  2439.  
  2440. ## Crown ratio statistical model
  2441.  
  2442. ```{r crownRatio_parametric, eval = eval_crown}
  2443. #
  2444. # # this is a similar model form to the merch model option from above
  2445. # crownModel <- lmer(asin(crownRatio) ~ diameter +
  2446. # (1 + diameter | common),
  2447. # data = crownDat)
  2448. #
  2449. # summary(crownModel)
  2450. #
  2451. # # predict method for asin transformed CR - MODIFY if changing the model form
  2452. # predict_crown <- function(treeList, model) {
  2453. # predCrownRatios <- sin(predict(model$model, newdata = treeList,
  2454. # allow.new.levels = T))
  2455. #
  2456. # return(predCrownRatios)
  2457. # }
  2458. #
  2459. # crownModelSet <- list(model = crownModel, predict_aux = predict_crown)
  2460. #
  2461. # crownDat$crownPred <- predict_crown(treeList = crownDat, model = crownModelSet)
  2462. #
  2463. #
  2464. # ggplot(crownDat, aes(x = crownRatio, y = crownPred)) +
  2465. # geom_point(alpha = 0.5) +
  2466. # facet_wrap(~ common,
  2467. # labeller = labeller(common = label_wrap_gen(width = 15))) +
  2468. # theme_bw() +
  2469. # geom_abline(intercept = 0, slope = 1) +
  2470. # xlab("observed crown ratio") +
  2471. # ylab("predicted crown ratio")
  2472.  
  2473. ```
  2474.  
  2475. ## Crown ratio model selection
  2476. The default choice is the proportional assignment system. Change this if you want to use a statistical model.
  2477. ```{r pickCrownModel, eval = eval_crown}
  2478. # by default, the crown ratio model is the proportional assignment system
  2479. crownSet <- crownPropSet
  2480. damageSet <- damagePropSet
  2481. # crownSet <- crownModelSet
  2482.  
  2483. save(crownSet, damageSet, file = file.path(modelDir, 'crownModel.Rda'))
  2484.  
  2485. ```
  2486.  
  2487. -----------------------
  2488.  
  2489. Taper Coefficients
  2490. -----------------------
  2491.  
  2492. ```{r volume, eval=eval_taper}
  2493.  
  2494. if(eval_taper & !eval_height) {
  2495. stop('Tried to obtain volume coefficients without fitting height model')
  2496. }
  2497.  
  2498. cruiseDataNoZero$ht_hat <- predict(htModel, newSpp = cruiseDataNoZero$st_species,
  2499. newDiam = cruiseDataNoZero$diameter)
  2500.  
  2501. measuredHts <- cruiseDataNoZero$total_height
  2502. htTrees <- which(!is.na(measuredHts))
  2503. cruiseDataNoZero$ht_hat[htTrees] <- measuredHts[htTrees]
  2504.  
  2505. taperCoeffs <- taper(species = cruiseDataNoZero$st_species,
  2506. diameter = cruiseDataNoZero$diameter,
  2507. expansion = cruiseDataNoZero$expansion,
  2508. height = cruiseDataNoZero$ht_hat,
  2509. bapa = medianBAPA,
  2510. latitude = lat,
  2511. longitude = lon,
  2512. gleanedData = gleanedList,
  2513. speciesCode = 'ST',
  2514. parallel = T)
  2515.  
  2516. pandoc.table(taperCoeffs,
  2517. split.tables = Inf,
  2518. split.cells = Inf,
  2519. justify = 'left',
  2520. digits = 4)
  2521.  
  2522. for(sp in unique(cruiseDataNoZero$st_species)) {
  2523. taperPlot <- plot(taperCoeffs, species = sp, speciesCode ='ST')
  2524. print(taperPlot)
  2525. }
  2526.  
  2527. save(taperCoeffs, file = file.path(modelDir, 'taperCoeffs.Rda'))
  2528.  
  2529. ```
Add Comment
Please, Sign In to add comment