# code for fit exploration of tutorial part 4 # https://www.andreashandel.com/posts/longitudinal-multilevel-bayesian-analysis-4/ ## ---- packages2 -------- library('dplyr') # for data manipulation library('ggplot2') # for plotting library('cmdstanr') #for model fitting library('rethinking') #for model fitting library('fs') #for file path ## ---- predict_function -------- # defining a function so I don't need to re-type the same code predfunction <- function(fl,fitdat) { #this will contain all the predictions from the different models fitpred = vector(mode = "list", length = length(fl)) # we are looping over each fitted model for (n in 1:length(fl)) { #get current model nowmodel = fl[[n]]$fit #make new data for which we want predictions #specifically, more time points so the curves are smoother timevec = seq(from = 0.1, to = max(fitdat$time), length=100) Ntot = max(fitdat$id) #new data used for predictions #make variables for all versions of dose we use preddat = data.frame( id = sort(rep(seq(1,Ntot),length(timevec))), time = rep(timevec,Ntot), dose = 0, dose_adj = 0, dose_adj2 = 0, dose_cat = "0", dose_cat2 = 0 ) #add right dose information for each individual #this could likely be coded better for (k in 1:Ntot) { #get actual dose for a given individual #assign that dose #if statements because not every data set/model has each dose type if ("dose" %in% names(fitdat)) { nowdose = unique(fitdat$dose[fitdat$id == k]) preddat[(preddat$id == k),"dose"] = nowdose } if ("dose_adj" %in% names(fitdat)) { nowdose_adj = unique(fitdat$dose_adj[fitdat$id == k]) preddat[(preddat$id == k),"dose_adj"] = nowdose_adj } if ("dose_adj2" %in% names(fitdat)) { nowdose_adj2 = unique(fitdat$dose_adj2[fitdat$id == k]) preddat[(preddat$id == k),"dose_adj2"] = nowdose_adj2 } if ("dose_cat" %in% names(fitdat)) { nowdose_cat = unique(fitdat$dose_cat[fitdat$id == k]) preddat[(preddat$id == k),"dose_cat"] = as.character(nowdose_cat) } if ("dose_cat2" %in% names(fitdat)) { nowdose_cat2 = unique(fitdat$dose_cat2[fitdat$id == k]) preddat[(preddat$id == k),"dose_cat2"] = nowdose_cat2 } } # pull out posterior samples for the parameters post <- extract.samples(nowmodel) # estimate and CI for parameter variation # this uses the link function from rethinking # we ask for predictions for the new data generated above linkmod <- rethinking::link(nowmodel, data = preddat) #computing mean and various credibility intervals #these choices are inspired by the Statistical Rethinking book #and purposefully do not include 95% #to minimize thoughts of statistical significance #significance is not applicable here since we are doing bayesian fitting modmean <- apply( linkmod$mu , 2 , mean ) modPI79 <- apply( linkmod$mu , 2 , PI , prob=0.79 ) modPI89 <- apply( linkmod$mu , 2 , PI , prob=0.89 ) modPI97 <- apply( linkmod$mu , 2 , PI , prob=0.97 ) # estimate and CI for prediction intervals # this uses the sim function from rethinking # the predictions factor in additional uncertainty around the mean (mu) # as indicated by sigma simmod <- rethinking::sim(nowmodel, data = preddat) # mean and credible intervals for outcome predictions # modmeansim should agree with above modmean values modmeansim <- apply( simmod , 2 , mean ) modPIsim <- apply( simmod , 2 , PI , prob=0.89 ) #place all predictions into a data frame #and store in a list for each model fitpred[[n]] = data.frame(id = as.factor(preddat$id), dose = preddat$dose_cat, predtime = preddat$time, Estimate = modmean, Q79lo = modPI79[1,], Q79hi = modPI79[2,], Q89lo = modPI89[1,], Q89hi = modPI89[2,], Q97lo = modPI97[1,], Q97hi = modPI97[2,], Qsimlo=modPIsim[1,], Qsimhi=modPIsim[2,] ) } #end loop over all models return(fitpred) } #end function computing predictions ## ---- plot_function -------- # defining a function so I don't need to re-type the same code plotfunction <- function(fl,fitpred,fitdat) { #list for storing all plots plotlist = vector(mode = "list", length = length(fl)) #small data adjustment for plotting plotdat <- fitdat %>% data.frame() %>% mutate(id = as.factor(id)) %>% mutate(dose = dose_cat) #looping over all models, creating and storing a plot for each for (n in 1:length(fl)) { #adding titles to plots title = fl[[n]]$model plotlist[[n]] <- ggplot(data = fitpred[[n]], aes(x = predtime, y = Estimate, group = id, color = dose )) + geom_line(show.legend = F ) + geom_ribbon(aes(x=predtime, ymin=Q89lo, ymax=Q89hi, fill = dose, color = NULL), alpha=0.3, show.legend = F) + geom_ribbon(aes(x=predtime, ymin=Qsimlo, ymax=Qsimhi, fill = dose, color = NULL), alpha=0.1, show.legend = F) + geom_point(data = plotdat, aes(x = time, y = outcome, group = id, color = dose), shape = 1, size = 2, stroke = 2) + scale_y_continuous(limits = c(-30,50)) + labs(y = "Virus load", x = "days post infection") + theme_minimal() + ggtitle(title) } return(plotlist) } #end function making plots ## ---- load_dat2 -------- #loading previously saved fits. filepath = fs::path("D:","Dropbox","datafiles","longitudinalbayes","ulamfits_dat2", ext="Rds") if (!file_exists(filepath)) { filepath = fs::path("C:","Data","Dropbox","datafiles","longitudinalbayes","ulamfits_dat2", ext="Rds") } fl <- readRDS(filepath) fitdat <- fl[[1]]$fit@data ## ---- explore_dat2 -------- #Model 2a print(precis(fl[[1]]$fit,depth=1),digits = 2) #Model 4 a0mean = mean(precis(fl[[2]]$fit,depth=2,"a0")$mean) b0mean = mean(precis(fl[[2]]$fit,depth=2,"b0")$mean) print(precis(fl[[2]]$fit,depth=1),digits = 2) print(c(a0mean,b0mean)) ## ---- compare_dat2 -------- comp <- compare(fl[[1]]$fit,fl[[2]]$fit) print(comp) ## ---- predict_dat2 -------- fitpred <- predfunction(fl,fitdat) ## ---- plot_dat2 -------- plotlist <- plotfunction(fl,fitpred,fitdat) plot(plotlist[[1]]) plot(plotlist[[2]]) ## ---- load_big -------- #loading previously saved fits. filepath = fs::path("D:","Dropbox","datafiles","longitudinalbayes","ulamfits_big", ext="Rds") if (!file_exists(filepath)) { filepath = fs::path("C:","Data","Dropbox","datafiles","longitudinalbayes","ulamfits_big", ext="Rds") } fl <- readRDS(filepath) fitdat <- fl[[1]]$fit@data ## ---- explore_big -------- a0mean = mean(precis(fl[[1]]$fit,depth=2,"a0")$mean) b0mean = mean(precis(fl[[1]]$fit,depth=2,"b0")$mean) print(precis(fl[[1]]$fit,depth=1),digits = 2) print(c(a0mean,b0mean)) ## ---- predict_big -------- fitpred <- predfunction(fl,fitdat) ## ---- plot_big -------- plotlist <- plotfunction(fl,fitpred,fitdat) #update plot p1 <- plotlist[[1]] + scale_y_continuous(limits = c(-30,70)) plot(p1) #save plot so we can use it in the blog post ggsave(file = paste0("featured.png"), p1, dpi = 300, units = "in", width = 6, height = 6) ## ---- load_altpos -------- #loading previously saved fits. filepath = fs::path("D:","Dropbox","datafiles","longitudinalbayes","ulamfits_altpos", ext="Rds") if (!file_exists(filepath)) { filepath = fs::path("C:","Data","Dropbox","datafiles","longitudinalbayes","ulamfits_altpos", ext="Rds") } fl <- readRDS(filepath) fitdat <- fl[[1]]$fit@data ## ---- explore_altpos_m4a -------- #Model 4a a0mean = mean(precis(fl[[1]]$fit,depth=2,"a0")$mean) b0mean = mean(precis(fl[[1]]$fit,depth=2,"b0")$mean) print(precis(fl[[1]]$fit,depth=1),digits = 2) print(c(a0mean,b0mean)) ## ---- explore_altpos_m5 -------- #Model 5 print(precis(fl[[2]]$fit,depth=1),digits = 2) a0mean = mean(precis(fl[[2]]$fit,depth=2,"a0")$mean) b0mean = mean(precis(fl[[2]]$fit,depth=2,"b0")$mean) print(c(a0mean,b0mean)) ## ---- convert_altpos_m5 -------- # computing values that correspond to a1 and b1 a1est = (precis(fl[[2]]$fit,pars="a2")[1,]-1)*a0mean/max(fitdat$dose) b1est = (precis(fl[[2]]$fit,pars="b2")[1,]-1)*b0mean/max(fitdat$dose) print(c(a1est,b1est)) ## ---- compare_altpos -------- compare(fl[[1]]$fit,fl[[2]]$fit) ## ---- predict_altpos -------- fitpred <- predfunction(fl,fitdat) ## ---- plot_altpos -------- plotlist <- plotfunction(fl,fitpred,fitdat) plot(plotlist[[1]]) plot(plotlist[[2]]) ## ---- load_cat -------- #loading previously saved fits. filepath = fs::path("D:","Dropbox","datafiles","longitudinalbayes","ulamfits_cat", ext="Rds") if (!file_exists(filepath)) { filepath = fs::path("C:","Data","Dropbox","datafiles","longitudinalbayes","ulamfits_cat", ext="Rds") } fl <- readRDS(filepath) fitdat <- fl[[1]]$fit@data ## ---- explore_cat -------- #Model 4 print(precis(fl[[1]]$fit,depth=2),digits = 2) #Model 6 print(precis(fl[[2]]$fit,depth=2),digits = 2) ## ---- compare_cat -------- compare(fl[[1]]$fit,fl[[2]]$fit) ## ---- predict_cat -------- fitpred <- predfunction(fl,fitdat) ## ---- plot_cat -------- plotlist <- plotfunction(fl,fitpred,fitdat) plot(plotlist[[1]]) plot(plotlist[[2]])