R code followed by Stata code. Tested with R 4.3.1 and Stata 18.
URL <- 'https://raw.githubusercontent.com/clayford/dataviz_with_ggplot2/master/alb_homes.csv'
homes <- read.csv(file = URL)
import delimited "https://raw.githubusercontent.com/clayford/dataviz_with_ggplot2/master/alb_homes.csv"
head(homes)
list in 1/6
hist(homes$totalvalue)
histogram totalvalue
plot(density(homes$totalvalue))
kdensity totalvalue, kernel(gaussian)
kdensity totalvalue /*epanechnikov*/
m1 <- lm(totalvalue ~ finsqft + bedroom + lotsize, data = homes)
summary(m1)
regress totalvalue finsqft bedroom lotsize
coef(m1)
matrix list e(b)
plot(m1, which = 1)
predict resid, residuals
predict fitted, xb
rvfplot, addplot(lowess resid fitted, leg(off))
* list points of interest
list totalvalue fitted finsqft bedroom lotsize if resid > 2000000
plot(m1, which = 2)
qnorm resid /*calculated above*/
plot(m1, which = 3)
predict resid_z, rstandard
replace resid_z = sqrt(abs(resid_z))
twoway scatter resid_z fitted ||
lowess resid_z fitted
plot(m1, which = 5)
predict lev, leverage
predict resid_z2, rstandard
twoway scatter resid_z2 lev ||
lowess resid_z2 lev
/*without Cooks D contours*/
m2 <- lm(log(totalvalue) ~ finsqft + bedroom + lotsize, data = homes)
gen log_totalvalue = log(totalvalue )
regress log_totalvalue finsqft bedroom lotsize
m4 <- lm(log(totalvalue) ~ fullbath + finsqft + hsdistrict,
data = homes)
encode hsdistrict, generate(hsdistrict_f)
regress log_totalvalue fullbath finsqft i.hsdistrict_f
m6 <- lm(log(totalvalue) ~ fullbath + finsqft + hsdistrict +
fullbath:finsqft +
fullbath:hsdistrict +
finsqft:hsdistrict,
data = homes)
regress log_totalvalue fullbath finsqft i.hsdistrict_f
c.fullbath#c.finsqft
c.fullbath#i.hsdistrict_f
c.finsqft#i.hsdistrict_f
R using natural splines, Stata using restricted cubic splines
library(splines)
nlm3 <- lm(log(totalvalue) ~ ns(finsqft, 5) + lotsize + hsdistrict +
ns(finsqft, 5):hsdistrict,
data = homes)
makespline rcs finsqft, knots(7) basis(fsf) order(3) replace
regress log_totalvalue lotsize c.fsf*##hsdistrict_f
drop1(m6)
testparm c.finsqft#i.hsdistrict_f
testparm c.fullbath#hsdistrict_f
testparm c.fullbath#c.finsqft
library(ggeffects)
plot(ggpredict(m6, terms = c("fullbath[1:5]", "hsdistrict")))
margins i.hsdistrict_f, at( (median) finsqft fullbath=(1(1)5))
expression(exp(predict(xb)))
marginsplot
plot(ggpredict(m6, terms = c("finsqft[1000:4000 by=500]",
"fullbath[2:5]")))
margins, at(finsqft=(1000(500)4000) fullbath=(2(1)5) hsdistrict_f=1)
expression(exp(predict(xb)))
marginsplot
m4 <- lm(log(totalvalue) ~ finsqft + bedroom + lotsize,
data = homes)
AIC(m4); BIC(m4)
regress log_totalvalue finsqft bedroom lotsize
estimates stats
m1 <- lm(totalvalue ~ finsqft + bedroom + lotsize, data = homes)
m2 <- lm(totalvalue ~ finsqft + bedroom, data = homes)
AIC(m1, m2)
BIC(m1, m2)
regress totalvalue finsqft bedroom lotsize
estimates store m1
regress totalvalue finsqft bedroom
estimates store m2
estimates table m1 m2, stats(aic bic)
for
loopTo iterate through a sequence of values:
for (i in 1:nrow(homes)) {
cat('High school district for home', i, 'is', homes$hsdistrict[i], '\n')
}
local N = _N
forval i = 1 / `N' {
display "High school district for home", `i', "is", hsdistrict[`i']
}
To iterate through a set of tokens:
sch <- c('esdistrict', 'msdistrict', 'hsdistrict')
for (i in sch) {
print(table(homes[[i]]))
}
local sch esdistrict msdistrict hsdistrict
foreach i in `sch' {
tab `i'
}
Example uses rnes96
data from Extending the Linear
Model by Julian Faraway. Data available here.
library(mice)
md.pattern(rnes96)
imp <- mice(rnes96, m = 10, print = FALSE, seed = 99)
fit <- with(imp, multinom(party ~ age + education + income))
summary(pool(fit))
use rnes96.dta
mi set flong
mi misstable patterns
mi register imputed education income
mi impute chained (pmm, knn(5)) education income = party age, add(10)
mi estimate: mlogit party age i.education income