Lab 4

Chilean 1988 Survey Data

# load data
chile <- carData::Chile

# summary
summary(chile)

##  region     population     sex           age        education  
##  C :600   Min.   :  3750   F:1379   Min.   :18.00   P   :1107  
##  M :100   1st Qu.: 25000   M:1321   1st Qu.:26.00   PS  : 462  
##  N :322   Median :175000            Median :36.00   S   :1120  
##  S :718   Mean   :152222            Mean   :38.55   NA's:  11  
##  SA:960   3rd Qu.:250000            3rd Qu.:49.00              
##           Max.   :250000            Max.   :70.00              
##                                     NA's   :1                  
##      income         statusquo          vote    
##  Min.   :  2500   Min.   :-1.80301   A   :187  
##  1st Qu.:  7500   1st Qu.:-1.00223   N   :889  
##  Median : 15000   Median :-0.04558   U   :588  
##  Mean   : 33876   Mean   : 0.00000   Y   :868  
##  3rd Qu.: 35000   3rd Qu.: 0.96857   NA's:168  
##  Max.   :200000   Max.   : 2.04859             
##  NA's   :98       NA's   :17

Estimate model of support for SQ on age, population, income

m1 <- lm(statusquo ~ age + income + population, chile)
summary(m1)

## 
## Call:
## lm(formula = statusquo ~ age + income + population, data = chile)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85884 -0.89881 -0.06414  0.89874  1.91793 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.895e-02  6.139e-02  -1.123    0.262    
## age          8.206e-03  1.298e-03   6.323 3.02e-10 ***
## income       2.336e-06  4.943e-07   4.727 2.40e-06 ***
## population  -2.212e-06  1.914e-07 -11.558  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9696 on 2586 degrees of freedom
##   (110 observations deleted due to missingness)
## Multiple R-squared:  0.06304,    Adjusted R-squared:  0.06195 
## F-statistic: 57.99 on 3 and 2586 DF,  p-value: < 2.2e-16

These coefficients are really hard to interpret because of scaling issues. Let’s rescale to something more reasonable.

# rescale vars
chile$age_10 <- chile$age / 10 # age in decades
chile$income_10K <- chile$income / 10000 # income in 10 thousands
chile$population_10K <- chile$population / 10000 # population in 10 thousands

# reestimate model
m1 <- lm(statusquo ~ age_10 + income_10K + population_10K, chile)
summary(m1)

## 
## Call:
## lm(formula = statusquo ~ age_10 + income_10K + population_10K, 
##     data = chile)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85884 -0.89881 -0.06414  0.89874  1.91793 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.068950   0.061394  -1.123    0.262    
## age_10          0.082057   0.012978   6.323 3.02e-10 ***
## income_10K      0.023362   0.004943   4.727 2.40e-06 ***
## population_10K -0.022118   0.001914 -11.558  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9696 on 2586 degrees of freedom
##   (110 observations deleted due to missingness)
## Multiple R-squared:  0.06304,    Adjusted R-squared:  0.06195 
## F-statistic: 57.99 on 3 and 2586 DF,  p-value: < 2.2e-16

Reproduce sig tests by hand

# store betas
B <- coef(m1)

# calculate standard errors
se <- sqrt(diag(vcov(m1)))

# calculate t stats
t <- B / se

# calculate p values for 2-tailed
p <- ( 1 - pt(abs(t), df = m1$df.residual) ) * 2

# report neatly
round(cbind(Beta = B, se = se, t = t, p = p), 4)

##                   Beta     se        t      p
## (Intercept)    -0.0689 0.0614  -1.1231 0.2615
## age_10          0.0821 0.0130   6.3228 0.0000
## income_10K      0.0234 0.0049   4.7267 0.0000
## population_10K -0.0221 0.0019 -11.5580 0.0000

Confidence intervals

# using confint
confint(m1)

##                      2.5 %      97.5 %
## (Intercept)    -0.18933668  0.05143765
## age_10          0.05660881  0.10750528
## income_10K      0.01367032  0.03305403
## population_10K -0.02587082 -0.01836584

# reproduce by hand
cbind(
  B + qt(0.025, df = m1$df.residual) * se,
  B + qt(0.975, df = m1$df.residual) * se
)

##                       [,1]        [,2]
## (Intercept)    -0.18933668  0.05143765
## age_10          0.05660881  0.10750528
## income_10K      0.01367032  0.03305403
## population_10K -0.02587082 -0.01836584

# get fancy (and efficient) using apply
CIs <- sapply(c(0.025, 0.20, 0.80, 0.975), 
              function(x) B + qt(x, df = m1$df.residual) * se)
CIs

##                       [,1]        [,2]        [,3]        [,4]
## (Intercept)    -0.18933668 -0.12062888 -0.01727015  0.05143765
## age_10          0.05660881  0.07113272  0.09298137  0.10750528
## income_10K      0.01367032  0.01920169  0.02752266  0.03305403
## population_10K -0.02587082 -0.02372919 -0.02050747 -0.01836584

# plot with "visually weighted" CIs
par(mar=c(5,4,1,1))
plot(1:3, B[2:4], pch=19, xlab="", ylab="marginal effect", 
     xlim=c(0.75, 3.25), ylim=c(-0.05, 0.15), axes=F)
axis(1, at=1:3, labels = c("Age (decades)", "Income (10Ks)", "Pop. (10Ks)"))
axis(2, at=seq(-0.05, 0.15, 0.05))
segments(1:3, CIs[2:4, 1], 1:3, CIs[2:4, 4], lwd=1, lty=1)
segments(1:3, CIs[2:4, 2], 1:3, CIs[2:4, 3], lwd=3, lty=1)
abline(h=0, lty=3)
box()

# make table
m1_tab <- cbind(
  Var = c("Intercept","Age (decades)", "Income (10Ks)", "Pop. (10Ks)"),
  B = round(coef(m1), 3),
  se = round(sqrt(diag(vcov(m1))), 3),
  round(confint(m1), 3)
)

# packages needed for what is below
library(kableExtra)
library(dplyr)
library(knitr)

# create and print table
kable(m1_tab, row.names = F) %>%
  kable_classic(full_width = T, html_font = "Cambria") %>%
  add_footnote(
    c(
      "------",
      paste0("N = ", nrow(m1$model)), 
      paste0("adj. R2 = ", round(summary(m1)$adj.r.squared, 3)), 
      "Notes: DV is support for status quo, in SD units"
    ), 
    notation = "none"
  )

Var	B	se	2.5 %	97.5 %
Intercept	-0.069	0.061	-0.189	0.051
Age (decades)	0.082	0.013	0.057	0.108
Income (10Ks)	0.023	0.005	0.014	0.033
Pop. (10Ks)	-0.022	0.002	-0.026	-0.018
——
N = 2590
adj. R2 = 0.062
Notes: DV is support for status quo, in SD units

Using `linearHypothesis` in `car` for tests

car::linearHypothesis(m1, "age_10 = 0")

## 
## Linear hypothesis test:
## age_10 = 0
## 
## Model 1: restricted model
## Model 2: statusquo ~ age_10 + income_10K + population_10K
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1   2587 2469.0                                  
## 2   2586 2431.4  1    37.588 39.978 3.016e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

car::linearHypothesis(m1, "age_10 = 0.05")

## 
## Linear hypothesis test:
## age_10 = 0.05
## 
## Model 1: restricted model
## Model 2: statusquo ~ age_10 + income_10K + population_10K
## 
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1   2587 2437.1                              
## 2   2586 2431.4  1    5.7367 6.1015 0.01357 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

car::linearHypothesis(m1, c("age_10 - income_10K = 0"))

## 
## Linear hypothesis test:
## age_10 - income_10K = 0
## 
## Model 1: restricted model
## Model 2: statusquo ~ age_10 + income_10K + population_10K
## 
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1   2587 2448.6                                  
## 2   2586 2431.4  1    17.233 18.328 1.927e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(lm(statusquo ~ age_10 + I(age_10 + income_10K) + population_10K, chile))

## 
## Call:
## lm(formula = statusquo ~ age_10 + I(age_10 + income_10K) + population_10K, 
##     data = chile)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85884 -0.89881 -0.06414  0.89874  1.91793 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -0.068950   0.061394  -1.123    0.262    
## age_10                  0.058695   0.013710   4.281 1.93e-05 ***
## I(age_10 + income_10K)  0.023362   0.004943   4.727 2.40e-06 ***
## population_10K         -0.022118   0.001914 -11.558  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9696 on 2586 degrees of freedom
##   (110 observations deleted due to missingness)
## Multiple R-squared:  0.06304,    Adjusted R-squared:  0.06195 
## F-statistic: 57.99 on 3 and 2586 DF,  p-value: < 2.2e-16

Lab 4

Chilean 1988 Survey Data

Estimate model of support for SQ on age, population, income

Reproduce sig tests by hand

Confidence intervals

Using linearHypothesis in car for tests

Using `linearHypothesis` in `car` for tests