-
Notifications
You must be signed in to change notification settings - Fork 0
/
IV_Example.R
67 lines (48 loc) · 2.41 KB
/
IV_Example.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
### IV example using dinkelman 2011 paper
### goal: find effect of having electricity on proportion of women employed
# load in data
library(haven)
df <- read_dta("DATA/dinkelman_aer2011_Econ587.dta")
# set up controls that will be used throughout the analysis
control = ~ kms_to_subs0+ baseline_hhdens0+ base_hhpovrate0+ prop_head_f_a0+
sexratio0_a+ prop_indianwhite0+ kms_to_road0+ kms_to_town0+ prop_matric_m0+
prop_matric_f0+ d_prop_waterclose+ d_prop_flush+ idcc1+idcc2+ idcc3+
idcc4+ idcc5+ idcc6+ idcc7+ idcc8+ idcc9
part_a = update(control, d_prop_emp_f ~ . + T)
# regression with clustered standard errors as baseline
library(estimatr)
reg_a<- lm_robust(part_a, data = df, se_type = "stata", cluster = placecode0)
summary(reg_a)
# Treatment regression
part_b = update(control, T ~ . + mean_grad_new)
reg_b<- lm_robust(part_b, data = df, se_type = "stata", cluster = placecode0)
summary(reg_b)
# F-test robust to heteroskedasticity
car::linearHypothesis(reg_b, "mean_grad_new=0")
# F-test done by hand just because why not
F_by_hand = (reg_b[["statistic"]][["mean_grad_new"]])^2
# get predicitions for T based off of regression part_b
get_T_hat = lm(part_b, data = df)
df$T_hat = predict(get_T_hat)
# using T hat instead of T now...
part_e = update(control, d_prop_emp_f ~ . + T_hat)
reg_e = lm_robust(part_e, data = df, se_type = "stata", cluster = placecode0)
summary(reg_e)
# big long IV regression
library(ivreg)
ivmodel=ivreg(d_prop_emp_f ~ kms_to_subs0+ baseline_hhdens0+ base_hhpovrate0+ prop_head_f_a0+
sexratio0_a+ prop_indianwhite0+ kms_to_road0+ kms_to_town0+ prop_matric_m0+
prop_matric_f0+ d_prop_waterclose+ d_prop_flush+ idcc1+idcc2+ idcc3+
idcc4+ idcc5+ idcc6+ idcc7+ idcc8+ idcc9+ T
| kms_to_subs0+ baseline_hhdens0+ base_hhpovrate0+ prop_head_f_a0+
sexratio0_a+ prop_indianwhite0+ kms_to_road0+ kms_to_town0+ prop_matric_m0+
prop_matric_f0+ d_prop_waterclose+ d_prop_flush+ idcc1+idcc2+ idcc3+idcc4+
idcc5+ idcc6+ idcc7+ idcc8+ idcc9 + mean_grad_new,
data=df)
# reduced form regression for use in Wald test
part_h = update(control, d_prop_emp_f ~ . + mean_grad_new)
reg_h<- lm_robust(part_h, data = df, se_type = "stata", cluster = placecode0)
summary(reg_h)
# Wald test
Wald = reg_h[["coefficients"]][["mean_grad_new"]] / reg_b[["coefficients"]][["mean_grad_new"]]
summary(second_stage)