## ----preliminaries, include = FALSE------------------------------------------- library("betareg") data("GasolineYield", package = "betareg") data("FoodExpenditure", package = "betareg") gy_loglog <- betareg(yield ~ batch + temp, data = GasolineYield, link = "loglog") fe_beta2 <- betareg(I(food/income) ~ income + persons | persons, data = FoodExpenditure) knitr::opts_chunk$set( engine = "R", collapse = TRUE, comment = "##", message = FALSE, warning = FALSE, echo = TRUE ) options(width = 70, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE, digits = 5) ## ----beta-distributions------------------------------------------------------- #| echo: false #| fig-width: 8.5 #| fig-height: 4.5 #| out-width: 100% #| label: fig-beta-distributions #| fig-cap: "Probability density functions for beta distributions with varying parameters $\\mu = 0.10, 0.25, 0.50, 0.75, 0.90$ and $\\phi = 5$ (left) and $\\phi = 100$ (right)." par(mfrow = c(1, 2), mar = c(4.1, 4.1, 4.1, 0.1)) dbeta2 <- function(x, mu, phi = 1) dbeta(x, mu * phi, (1 - mu) * phi) x <- seq(from = 0.01, to = 0.99, length = 200) xx <- cbind(x, x, x, x, x) yy <- cbind( dbeta2(x, 0.10, 5), dbeta2(x, 0.25, 5), dbeta2(x, 0.50, 5), dbeta2(x, 0.75, 5), dbeta2(x, 0.90, 5) ) matplot(xx, yy, type = "l", xlab = "y", ylab = "Density", main = expression(phi == 5), lty = 1, col = "black", ylim = c(0, 15)) text(0.05, 12 , "0.10") text(0.95, 12 , "0.90") text(0.22, 2.8, "0.25") text(0.78, 2.8, "0.75") text(0.50, 2.3, "0.50") yy <- cbind( dbeta2(x, 0.10, 100), dbeta2(x, 0.25, 100), dbeta2(x, 0.50, 100), dbeta2(x, 0.75, 100), dbeta2(x, 0.90, 100) ) matplot(xx, yy, type = "l", xlab = "y", ylab = "", main = expression(phi == 100), lty = 1, col = "black", ylim = c(0, 15)) text(0.10, 14.5, "0.10") text(0.90, 14.5, "0.90") text(0.25, 9.8, "0.25") text(0.75, 9.8, "0.75") text(0.50, 8.6, "0.50") ## ----eval=FALSE--------------------------------------------------------------- ## betareg(formula, data, subset, na.action, weights, offset, ## link = "logit", link.phi = NULL, control = betareg.control(...), ## model = TRUE, y = TRUE, x = FALSE, ...) ## ----GasolineYield-betareg---------------------------------------------------- data("GasolineYield", package = "betareg") gy_logit <- betareg(yield ~ batch + temp, data = GasolineYield) summary(gy_logit) ## ----GasolineYield-visualization---------------------------------------------- #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-GasolineYield #| fig-cap: "Gasoline yield data from @betareg:Prater:1956: Proportion of crude oil converted to gasoline explained by temperature (in degrees Fahrenheit) at which all gasoline has vaporized and given batch (indicated by gray level). Fitted curves correspond to beta regressions `gy_loglog` with log-log link (solid, red) and `gy_logit` with logit link (dashed, blue). Both curves were evaluated at varying temperature with the intercept for batch 6 (i.e., roughly the average intercept)." redblue <- hcl(c(0, 260), 90, 40) plot(yield ~ temp, data = GasolineYield, type = "n", ylab = "Proportion of crude oil converted to gasoline", xlab = "Temperature at which all gasoline has vaporized", main = "Prater's gasoline yield data") points(yield ~ temp, data = GasolineYield, cex = 1.75, pch = 19, col = rev(gray.colors(10))[as.numeric(batch)]) points(yield ~ temp, data = GasolineYield, cex = 1.75) legend("topleft", as.character(1:10), title = "Batch", col = rev(gray.colors(10)), pch = 19, bty = "n") legend("topleft", as.character(1:10), title = "Batch", pch = 1, bty = "n") lines(150:500, predict(gy_logit, newdata = data.frame(temp = 150:500, batch = "6")), col = redblue[2], lwd = 2, lty = 2) lines(150:500, predict(gy_loglog, newdata = data.frame(temp = 150:500, batch = "6")), col = redblue[1], lwd = 2) legend("bottomright", c("log-log", "logit"), col = redblue, lty = 1:2, lwd = 2, bty = "n") ## ----GasolineYield-plot------------------------------------------------------- #| fig-width: 8.5 #| fig-height: 10 #| out-width: 100% #| label: fig-GasolineYield-plot #| fig-cap: "Diagnostic plots for beta regression model `gy_logit`." par(mfrow = c(3, 2)) suppressWarnings(RNGversion("3.5.0")) set.seed(123) plot(gy_logit, which = 1:4, type = "pearson") plot(gy_logit, which = 5, type = "deviance", sub.caption = "") plot(gy_logit, which = 1, type = "deviance", sub.caption = "") ## ----GasolineYield-update----------------------------------------------------- gy_logit4 <- update(gy_logit, subset = -4) coef(gy_logit, model = "precision") coef(gy_logit4, model = "precision") ## ----FoodExpenditure-lm------------------------------------------------------- data("FoodExpenditure", package = "betareg") fe_lm <- lm(I(food/income) ~ income + persons, data = FoodExpenditure) ## ----FoodExpenditure-bptest--------------------------------------------------- library("lmtest") bptest(fe_lm) ## ----FoodExpenditure-betareg-------------------------------------------------- fe_beta <- betareg(I(food/income) ~ income + persons, data = FoodExpenditure) summary(fe_beta) ## ----FoodExpenditure-visualization-------------------------------------------- #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-FoodExpenditure #| fig-cap: "Household food expenditure data from @betareg:Griffiths+Hill+Judge:1993: Proportion of household income spent on food explained by household income and number of persons in household (indicated by gray level). Fitted curves correspond to beta regressions `fe_beta` with fixed dispersion (long-dashed, blue), `fe_beta2` with variable dispersion (solid, red), and the linear regression `fe_lin` (dashed, black). All curves were evaluated at varying income with the intercept for mean number of persons ($ = `r round(mean(FoodExpenditure$persons), digits = 2)`$)." redblueblack <- hcl(c(0, 260, 0), c(90, 90, 0), c(40, 40, 0)) plot(I(food/income) ~ income, data = FoodExpenditure, xlab = "Household income", ylab = "Proportion of food expenditures", main = "Food expenditures data", type = "n", ylim = c(0.04, 0.57)) points(I(food/income) ~ income, data = FoodExpenditure, cex = 1.75, pch = 19, col = rev(gray.colors(7))[persons]) points(I(food/income) ~ income, data = FoodExpenditure, cex = 1.75) legend("bottomleft", rev(as.character(sort(unique(FoodExpenditure$persons)))), title = "Persons", col = gray.colors(7), pch = 19, bty = "n") legend("bottomleft", rev(as.character(sort(unique(FoodExpenditure$persons)))), title = "Persons", pch = 1, bty = "n") lines(10:100, predict(fe_lm, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[3], lwd = 2, lty = 2) lines(10:100, predict(fe_beta, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[2], lwd = 2, lty = 5) lines(10:100, predict(fe_beta2, newdata = data.frame(income = 10:100, persons = mean(FoodExpenditure$persons))), col = redblueblack[1], lwd = 2) legend("topright", c("logit, var. disp.", "logit, fix. disp.", "lm"), col = redblueblack, lty = c(1, 5, 2), lwd = 2, bty = "n") ## ----GasolineYield-phireg----------------------------------------------------- gy_logit2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield) ## ----GasolineYield-phireg-coef, echo=FALSE------------------------------------ printCoefmat(summary(gy_logit2)$coefficients$precision) ## ----GasolineYield-lrtest----------------------------------------------------- lrtest(gy_logit, gy_logit2) ## ----FoodExpenditure-betareg2------------------------------------------------- fe_beta2 <- betareg(I(food/income) ~ income + persons | persons, data = FoodExpenditure) ## ----FoodExpenditure-comparison----------------------------------------------- lrtest(fe_beta, fe_beta2) AIC(fe_beta, fe_beta2, k = log(nrow(FoodExpenditure))) ## ----GasolineYield-loglog----------------------------------------------------- gy_loglog <- betareg(yield ~ batch + temp, data = GasolineYield, link = "loglog") ## ----GasolineYield-Rsquared--------------------------------------------------- summary(gy_logit)$pseudo.r.squared summary(gy_loglog)$pseudo.r.squared ## ----GasolineYield-AIC-------------------------------------------------------- AIC(gy_logit, gy_logit2, gy_loglog) ## ----GasolineYield-reset------------------------------------------------------ lrtest(gy_logit, . ~ . + I(predict(gy_logit, type = "link")^2)) lrtest(gy_loglog, . ~ . + I(predict(gy_loglog, type = "link")^2)) ## ----GasolineYield-diagnostics------------------------------------------------ #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-GasolineYield-diagnostics #| fig-cap: "Scatterplot comparing the absolute raw residuals from beta regression modes with log-log link (x-axis) and logit link (y-axis)." plot(abs(residuals(gy_loglog, type = "response")), abs(residuals(gy_logit, type = "response"))) abline(0, 1, lty = 2) ## ----GasolineYield-loglog2---------------------------------------------------- gy_loglog2 <- update(gy_loglog, link.phi = "log") summary(gy_loglog2)$iterations ## ----FoodExpenditure-links---------------------------------------------------- sapply(c("logit", "probit", "cloglog", "cauchit", "loglog"), function(x) logLik(update(fe_beta2, link = x))) ## ----ReadingSkills-eda, echo=FALSE, results='hide'---------------------------- data("ReadingSkills", package = "betareg") rs_accuracy <- format(round(with(ReadingSkills, tapply(accuracy, dyslexia, mean)), digits = 3)) ## ----ReadingSkills-ols-------------------------------------------------------- data("ReadingSkills", package = "betareg") rs_ols <- lm(qlogis(accuracy) ~ dyslexia * iq, data = ReadingSkills) coeftest(rs_ols) ## ----ReadingSkills-beta------------------------------------------------------- rs_beta <- betareg(accuracy ~ dyslexia * iq | dyslexia + iq, data = ReadingSkills, hessian = TRUE) coeftest(rs_beta) ## ----ReadingSkills-visualization---------------------------------------------- #| echo: false #| fig-width: 6 #| fig.height: 5.5 #| out-width: 100% #| label: fig-ReadingSkills #| fig-cap: "Reading skills data from @betareg:Smithson+Verkuilen:2006 : Linearly transformed reading accuracy by IQ score and dyslexia status (control, blue vs. dyslexic, red). Fitted curves correspond to beta regression `rs_beta` (solid) and OLS regression with logit-transformed dependent variable `rs_ols` (dashed)." cl1 <- hcl(c(260, 0), 90, 40) cl2 <- hcl(c(260, 0), 10, 95) plot(accuracy ~ iq, data = ReadingSkills, col = cl2[as.numeric(dyslexia)], main = "Reading skills data", xlab = "IQ score", ylab = "Reading accuracy", pch = c(19, 17)[as.numeric(dyslexia)], cex = 1.5) points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = (1:2)[as.numeric(dyslexia)], col = cl1[as.numeric(dyslexia)]) nd <- data.frame(dyslexia = "no", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[1], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[1], lty = 2, lwd = 2) nd <- data.frame(dyslexia = "yes", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[2], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[2], lty = 2, lwd = 2) legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(19, 17, NA, NA), lwd = 2, col = c(cl2, 1, 1), bty = "n") legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(1, 2, NA, NA), col = c(cl1, NA, NA), bty = "n") ## ----strucchange-data--------------------------------------------------------- suppressWarnings(RNGversion("3.5.0")) set.seed(123) y1 <- c(rbeta(150, 0.3 * 4, 0.7 * 4), rbeta(50, 0.5 * 4, 0.5 * 4)) y2 <- c(rbeta(100, 0.3 * 4, 0.7 * 4), rbeta(100, 0.3 * 8, 0.7 * 8)) ## ----strucchange-gefp--------------------------------------------------------- library("strucchange") y1_gefp <- gefp(y1 ~ 1, fit = betareg) y2_gefp <- gefp(y2 ~ 1, fit = betareg) ## ----strucchange-plot1-------------------------------------------------------- #| fig-width: 6.5 #| fig-height: 6 #| label: fig-strucchange1 #| fig-cap: "Structural change tests for artificial data `y1` with change in $\\mu$." plot(y1_gefp, aggregate = FALSE) ## ----strucchange-plot2-------------------------------------------------------- #| fig-width: 6.5 #| fig-height: 6 #| label: fig-strucchange2 #| fig-cap: "Structural change tests for artificial data `y2` with change in $\\phi$." plot(y2_gefp, aggregate = FALSE)