-
Notifications
You must be signed in to change notification settings - Fork 0
/
answers.txt
142 lines (99 loc) · 4.71 KB
/
answers.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# YOUR TURN #1 ------------------------------------------------------------
# submit the following code to simulate some data
set.seed(2)
x1 <- sample(1:5, size = 1000, replace = TRUE,
prob = c(0.1,0.2,0.3,0.3,0.1))
x2 <- rnorm(n = 1000, mean = 12, sd = 2)
noise <- rnorm(n = 1000, mean = 0, sd = 4)
y <- 5 + 10*x1 + -4*x2 + noise
df <- data.frame(y, x1, x2)
# Use lm() in attempt to recover the "true" values.
m <- lm(y ~ x1 + x2)
summary(m)
# YOUR TURN #2 ------------------------------------------------------------
# Add bathrooms and garage size to the 2nd model we fit:
# sales_mod <- lm(log(price) ~ finsqft + bedrooms + lotsize, data = sales)
m2 <- lm(log(price) ~ finsqft + bedrooms + lotsize + bathrooms + garagesize,
data = sales)
summary(m2)
# check the diagnostic plots
plot(m2)
# What does the garagesize coefficient say?
exp(coef(m2)) %>% round(3)
# Each additional car space increases price by about 12%
exp(confint(m2)) %>% round(3)
# Or each additional car space increases price by at least 8%
# Challenge: simulate data from the model and compare to the observed price
sim.price <- simulate(m2, nsim = 50)
plot(density(log(sales$price)))
for(i in 1:50)lines(density(sim.price[[i]]), lty = 2, col = "grey80")
# YOUR TURN #3 ------------------------------------------------------------
# Add highway to the following model and fit it.
# lm(log(price) ~ finsqft + bedrooms + lotsize + bathrooms + garagesize + quality,
# data = sales)
m3 <- lm(log(price) ~ finsqft + bedrooms + lotsize +
bathrooms + garagesize + quality + highway,
data = sales)
# What is the intreptation of highway? How does it relate to the expected price?
coef(m3) %>% exp() %>% round(3)
# Being next to a highway appears to decrease price by about 5%
# YOUR TURN #4 ------------------------------------------------------------
# Add an interaction for lotsize and quality to the following model:
# lm(log(price) ~ finsqft + bedrooms + lotsize + bathrooms + garagesize +
# quality + highway, data = sales)
m4 <- lm(log(price) ~ finsqft + bedrooms + lotsize + bathrooms + garagesize +
quality + highway + lotsize:quality,
data = sales)
summary(m4)
# Try creating an effect plot; use the code above as a template.
plot(ggpredict(m4, terms = c("lotsize", "quality")))
# YOUR TURN #5 ------------------------------------------------------------
# Fit a non-linear effect for bedrooms using a natural spline with 3 DF.
# lm(log(price) ~ finsqft + bedrooms + lotsize + bathrooms + garagesize +
# quality + highway,
# data = sales)
m5 <- lm(log(price) ~ finsqft + ns(bedrooms, df = 3) + lotsize + bathrooms +
garagesize + quality + highway,
data = sales)
summary(m5)
# generate an effect for the non-linear bedrooms effect
plot(ggpredict(m5, terms = "bedrooms[n = 20]"))
# How does the crPlot look?
crPlots(m5, ~ns(bedrooms, df = 3))
# YOUR TURN #6 ------------------------------------------------------------
# Compare these two models. The second contains a complex interaction between
# finsqft and bedrooms.
home_mod1 <- lm(log(price) ~ ns(finsqft, 3) + ns(bedrooms, 3) + bathrooms +
ac + pool + quality, data = sales)
home_mod2 <- lm(log(price) ~ ns(finsqft, 3) * ns(bedrooms, 3) + bathrooms +
ac + pool + quality, data = sales)
anova(home_mod1, home_mod2)
AIC(home_mod1, home_mod2)
plot(ggpredict(home_mod2, terms = c("finsqft", "bedrooms[1 ,3, 5]")))
sim_mod2 <- simulate(home_mod2, nsim = 50)
plot(density(log(sales$price)))
for(i in 1:50)lines(density(sim_mod2[[i]]), col = "grey80")
sim_mod1 <- simulate(home_mod1, nsim = 50)
plot(density(log(sales$price)))
for(i in 1:50)lines(density(sim_mod1[[i]]), col = "grey80")
# YOUR TURN #7 ------------------------------------------------------------
# Modify the code below to produce an effect of plot for quality with lotsize
# set to 20000.
eff_out <- ggpredict(sales_mod7, terms = "lotsize",
condition = c(finsqft = 2500,
bedrooms = 4,
quality = "medium",
bathrooms = 2,
garagesize = 2,
pool = "no",
ac = "yes"))
plot(eff_out)
eff_out <- ggpredict(sales_mod7, terms = "quality",
condition = c(finsqft = 2500,
bedrooms = 4,
lotsize = 20000,
bathrooms = 2,
garagesize = 2,
pool = "no",
ac = "yes"))
plot(eff_out)