Weibull Gamma

# Load Weibull Gamma
data  = read.csv('BDA_WG/Summary.csv')
converge = read.csv('BDA_WG/Convergence.csv')
converge = subset(converge, language=='English')
converge$converged = "No"
converge$converged[converge$Rhat_k < 1.2 & converge$Rhat_l < 1.2 & converge$Rhat_delta < 1.2] = 'Yes'

message(paste('Percent Chains Converged:', 100*dim(converge[converge$converged == 'Yes',])[1]/dim(converge)[1]))
## Percent Chains Converged: 40.0250941028858
eng = merge(converge, data)
mcdi  = subset(read.csv('../Production/production-mcdi.csv'), language=='English')
engWG = eng[eng$converged=='Yes',c('item','item.id','k_Mean','lambda_Mean','delta_Mean')]

pred = merge(mcdi, engWG)
pred$BDA_WG = with(pred,pgamma((age/lambda_Mean)**delta_Mean, k_Mean, 1))

pred = pred[,c('item.id', 'item', 'language', 'age', 'produces', 'n', 'category', 'BDA_WG')]

Hidaka (2013)’s Weibull Gamma

# Load Hidaka's fits
hidaka = read.csv('hidaka.csv')
hidaka = hidaka[,c('item','WG_log_delta','WG_log_beta','WG_log_k')]
hidaka$k = exp(hidaka$WG_log_k)
hidaka$lambda = exp(hidaka$WG_log_delta)
hidaka$delta = exp(hidaka$WG_log_beta)

hidaka = merge(mcdi, hidaka)
hidaka$Hidaka_WG = with(hidaka,pgamma((age/lambda)**delta, k, 1))
hidaka = hidaka[,c('item','item.id','age','produces','n','category','Hidaka_WG')]

h = ddply(hidaka, .(item), summarise, Var=var(Hidaka_WG))
hidaka = merge(hidaka, h)
hidaka = hidaka[hidaka$Var!=0,]
hidaka$Var = NULL

Weibull Gamma with start time parameter

data  = read.csv('BDA_WGs/Summary.csv')
converge = read.csv('BDA_WGs/Convergence.csv')
converge = subset(converge, language=='English')
converge$converged = "No"
converge$converged[converge$Rhat_k < 1.2 & converge$Rhat_l < 1.2 & converge$Rhat_delta < 1.2 & converge$Rhat_s < 1.2] = 'Yes'

message(paste('Percent Chains Converged:', 100*dim(converge[converge$converged == 'Yes',])[1]/dim(converge)[1]))
## Percent Chains Converged: 91.8338108882521
eng = merge(converge, data)
engWGs = eng[eng$converged=='Yes',c('item','item.id','k_Mean','lambda_Mean','delta_Mean','start_Mean')]

pre = merge(mcdi, engWGs)
pre$BDA_WGs = with(pre,pgamma(((age-start_Mean)/lambda_Mean)**delta_Mean, k_Mean, 1))

bdaWgs = pre[,c('item.id', 'item', 'language', 'age', 'produces', 'n', 'category', 'BDA_WGs')]

Gamma Model with start time parameter

data  = read.csv('../Production/All_Data_Production_Summary.csv')
converge = read.csv('../Production/All_Data_Production_Convergence.csv')
converge = subset(converge, language=='English')
converge$converged = "No"
converge$converged[converge$Rhat_k < 1.2 & converge$Rhat_l < 1.2 & converge$Rhat_s < 1.2] = 'Yes'

message(paste('Percent Chains Converged:', 100*dim(converge[converge$converged == 'Yes',])[1]/dim(converge)[1]))
## Percent Chains Converged: 100
eng   = subset(data, language=='English')
engWGs = eng[,c('item','item.id','k_Mean','lambda_Mean','start_Mean')]

gam = merge(mcdi, engWGs)
gam$BDA_Gs = with(gam,pgamma(((age-start_Mean)*lambda_Mean), k_Mean, 1))

gam = gam[,c('item.id', 'item', 'language', 'age', 'produces', 'n', 'category', 'BDA_Gs')]

Model Comparison

pred$variable = colnames(pred)[dim(pred)[2]]
colnames(pred)[dim(pred)[2]-1] = 'value'

hidaka$variable = colnames(hidaka)[dim(hidaka)[2]]
colnames(hidaka)[dim(hidaka)[2]-1] = 'value'
hidaka$language = 'English'
hidaka = hidaka[,colnames(pred)]

bdaWgs$variable = colnames(bdaWgs)[dim(bdaWgs)[2]]
colnames(bdaWgs)[dim(bdaWgs)[2]-1] = 'value'

gam$variable = colnames(gam)[dim(gam)[2]]
colnames(gam)[dim(gam)[2]-1] = 'value'

pred.m = rbind(rbind(rbind(pred, hidaka),bdaWgs),gam)

mycor.test <- function(df) {
  cr <- cor.test(df$value, df$produces) 
  data.frame(estimate=cr$estimate, ymin=cr$conf.int[1], ymax=cr$conf.int[2])
}

m <- ddply( pred.m, c("variable"), mycor.test)
m$Model <- m$variable

m.w <- ddply( pred.m, c("item","variable"), mycor.test)
m.w$Model <- m.w$variable
m.w$Model = reorder.factor(m.w$Model, new.order=c('Hidaka_WG', 'BDA_WG', 'BDA_WGs', 'BDA_Gs'))

ggplot(m.w, aes(y=estimate**2, x=Model, ymin=ymin**2, ymax=ymax**2)) + 
  geom_jitter(alpha=0.1, aes(color=Model)) +
  stat_summary(fun.data=mean_cl_boot, geom='errorbar') +
  stat_summary(fun.y=mean, geom='point') +
  ylim(0,1) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90)) + 
  guides(color=F) +
  xlab("") + ylab("Coef. of Determination")