R code followed by Stata code. Tested with R 4.3.1 and Stata 18.

import CSV from URL

URL <- 'https://raw.githubusercontent.com/clayford/dataviz_with_ggplot2/master/alb_homes.csv'
homes <- read.csv(file = URL)
import delimited "https://raw.githubusercontent.com/clayford/dataviz_with_ggplot2/master/alb_homes.csv"

histogram

hist(homes$totalvalue)
histogram totalvalue

density histogram

plot(density(homes$totalvalue))
kdensity totalvalue, kernel(gaussian)
kdensity totalvalue /*epanechnikov*/

linear model

m1 <- lm(totalvalue ~ finsqft + bedroom + lotsize, data = homes)
summary(m1)
regress totalvalue finsqft bedroom lotsize

extract coefficients

coef(m1)
matrix list e(b)

residuals versus fitted values

plot(m1, which = 1)
predict resid, residuals
predict fitted, xb
rvfplot, addplot(lowess resid fitted, leg(off))
* list points of interest
list totalvalue fitted finsqft bedroom lotsize if resid > 2000000

QQ plot of residuals

plot(m1, which = 2)
qnorm resid /*calculated above*/

scale-location plot

plot(m1, which = 3)
predict resid_z, rstandard
replace resid_z = sqrt(abs(resid_z))
twoway scatter resid_z fitted || 
       lowess resid_z fitted 

residuals vs leverage

plot(m1, which = 5)
predict lev, leverage
predict resid_z2, rstandard
twoway scatter resid_z2 lev || 
       lowess resid_z2 lev
/*without Cooks D contours*/

model with log-transformed response

m2 <- lm(log(totalvalue) ~ finsqft + bedroom + lotsize, data = homes)
gen log_totalvalue = log(totalvalue )
regress log_totalvalue finsqft bedroom lotsize

model with categorical predictors

m4 <- lm(log(totalvalue) ~ fullbath + finsqft + hsdistrict, 
         data = homes)
encode hsdistrict, generate(hsdistrict_f)
regress log_totalvalue fullbath finsqft i.hsdistrict_f

model with interactions

m6 <- lm(log(totalvalue) ~ fullbath + finsqft + hsdistrict + 
           fullbath:finsqft + 
           fullbath:hsdistrict + 
           finsqft:hsdistrict, 
           data = homes)
regress log_totalvalue fullbath finsqft i.hsdistrict_f  
   c.fullbath#c.finsqft 
   c.fullbath#i.hsdistrict_f 
   c.finsqft#i.hsdistrict_f

model with non-linear effects

R using natural splines, Stata using restricted cubic splines

library(splines)
nlm3 <- lm(log(totalvalue) ~ ns(finsqft, 5) + lotsize + hsdistrict +
           ns(finsqft, 5):hsdistrict, 
         data = homes)
makespline rcs finsqft, knots(7) basis(fsf) order(3) replace
regress log_totalvalue lotsize c.fsf*##hsdistrict_f

partial F tests (Type II SS)

drop1(m6)
testparm c.finsqft#i.hsdistrict_f 
testparm c.fullbath#hsdistrict_f
testparm c.fullbath#c.finsqft

effect plot (continuous/categorical interaction)

library(ggeffects)
plot(ggpredict(m6, terms = c("fullbath[1:5]", "hsdistrict")))
margins i.hsdistrict_f,  at( (median) finsqft fullbath=(1(1)5)) 
        expression(exp(predict(xb)))
marginsplot

effect plot (continuous/continuous interaction)

plot(ggpredict(m6, terms = c("finsqft[1000:4000 by=500]", 
                             "fullbath[2:5]")))
margins,  at(finsqft=(1000(500)4000) fullbath=(2(1)5) hsdistrict_f=1)
             expression(exp(predict(xb)))
marginsplot

AIC/BIC values

m4 <- lm(log(totalvalue) ~ finsqft + bedroom + lotsize, 
         data = homes)
AIC(m4); BIC(m4)

regress log_totalvalue finsqft bedroom lotsize
estimates stats

AIC/BIC values for model comparison

m1 <- lm(totalvalue ~ finsqft + bedroom + lotsize, data = homes)
m2 <- lm(totalvalue ~ finsqft + bedroom, data = homes)
AIC(m1, m2)
BIC(m1, m2)
regress totalvalue finsqft bedroom lotsize
estimates store m1
regress totalvalue finsqft bedroom
estimates store m2
estimates table m1 m2, stats(aic bic)

for loop

To iterate through a sequence of values:

for (i in 1:nrow(homes)) {
  cat('High school district for home', i, 'is', homes$hsdistrict[i], '\n')
}
local N = _N
forval i = 1 / `N' {
    display "High school district for home", `i', "is", hsdistrict[`i']
}

To iterate through a set of tokens:

sch <- c('esdistrict', 'msdistrict', 'hsdistrict')
for (i in sch) {
  print(table(homes[[i]]))
}
local sch esdistrict msdistrict hsdistrict
foreach i in `sch' {
    tab `i'
}

multiple imputation

Example uses rnes96 data from Extending the Linear Model by Julian Faraway. Data available here.

library(mice)
md.pattern(rnes96)
imp <- mice(rnes96, m = 10, print = FALSE, seed = 99)
fit <- with(imp, multinom(party ~ age + education + income))
summary(pool(fit))
use rnes96.dta
mi set flong
mi misstable patterns
mi register imputed education income
mi impute chained (pmm, knn(5)) education income = party age, add(10)
mi estimate: mlogit party age i.education income