-
Notifications
You must be signed in to change notification settings - Fork 4
/
notebook.r
155 lines (128 loc) · 4.94 KB
/
notebook.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# This sets the size of plots to a good default.
options(repr.plot.width = 5, repr.plot.height = 4)
# Loading in packages
library(tidyverse)
library(testthat)
library(IRkernel.testthat)
run_tests({
test_that("the packages are loaded", {
expect_true( all(c("package:ggplot2", "package:readr", "package:dplyr") %in% search() ),
info = "The dplyr, readr and ggplot2 packages should be loaded using import().")
})
})
# Reading in the data
data <- read_csv('datasets/candy_crush.csv')
# Printing out the first couple of rows
head(data)
library(tidyverse)
run_tests({
test_that("data is read in correctly", {
correct_data <- read_csv("datasets/candy_crush.csv")
expect_equal(correct_data, data,
info = "data should countain datasets/candy_crush.csv read in using read_csv")
})
})
print("Number of players:")
length(unique(data$player_id))
print("Period for which we have data:")
range(data$dt)
run_tests({
test_that("nothing", {
expect_true(TRUE, info = "")
})
})
# Calculating level difficulty
difficulty <- data %>% group_by(level) %>% summarise(
attempts = sum(num_attempts),
wins = sum(num_success)) %>% mutate(p_win = wins/attempts)
# Printing out the level difficulty
print(difficulty)
run_tests({
test_that("p_win is calculated correctly", {
correct_difficulty <- data %>%
group_by(level) %>%
summarise(attempts = sum(num_attempts), wins = sum(num_success)) %>%
mutate(p_win = wins / attempts)
expect_equal(correct_difficulty$p_win, difficulty$p_win,
info = "difficulty$p_win should be estimated probability to pass each level in a single attempt")
})
})
# Plotting the level difficulty profile
ggplot(data=difficulty, aes(x = level, y = p_win)) +
geom_line() + scale_x_continuous(breaks = 1:15) +
scale_y_continuous(label = scales::percent) +
ylab("Probability of Winning Level") +
ggtitle("Level Difficulty") +
theme(plot.title = element_text(hjust = 0.5))
run_tests({
test_that("the student plotted a ggplot", {
expect_true('ggplot' %in% class(last_plot()),
info = "You should plot difficulty using ggplot.")
})
})
# Adding points and a dashed line
ggplot(data=difficulty, aes(x = level, y = p_win)) +
geom_line() + scale_x_continuous(breaks = 1:15) +
scale_y_continuous(label = scales::percent) +
ylab("Probability of Winning Level") +
ggtitle("Level Difficulty") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point() +
geom_hline(yintercept=0.1, linetype='dashed', color='red')
run_tests({
plot_layers <- sapply(last_plot()$layers, function(layer) class(layer$geom)[1])
test_that("the student has plotted lines, points and a hline", {
expect_true(all(c('GeomLine', 'GeomPoint', 'GeomHline') %in% plot_layers),
info = "The plot should include lines between the datapoints, points at the datapoints and a horisontal line.")
})
})
# Computing the standard error of p_win for each level
difficulty <- difficulty %>% mutate(error = sqrt(p_win * (1 - p_win) /
attempts))
head(difficulty)
run_tests({
test_that("error is correct", {
correct_difficulty <- difficulty %>%
mutate(error = sqrt(p_win * (1 - p_win) / attempts))
expect_equal(correct_difficulty$error, difficulty$error,
info = "difficulty$error should be calculated as sqrt(p_win * (1 - p_win) / attempts)")
})
})
# Adding standard error bars
ggplot(data=difficulty, aes(x = level, y = p_win)) +
geom_line() + scale_x_continuous(breaks = 1:15) +
scale_y_continuous(label = scales::percent) +
ylab("Probability of Winning Level") +
ggtitle("Level Difficulty") +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point() +
geom_hline(yintercept=0.1, linetype='dashed') +
geom_errorbar(aes(ymin= p_win - error, ymax = p_win + error),
color='red')
run_tests({
plot_layers <- sapply(last_plot()$layers, function(layer) class(layer$geom)[1])
test_that("the student has plotted lines, points and a hline", {
expect_true("GeomErrorbar" %in% plot_layers,
info = "The plot should include error bats using geom_errorbar.")
})
})
# The probability of completing the episode without losing a single time
p <- prod(difficulty$p_win)
# Printing it out
p
run_tests({
test_that("p is correct", {
correct_p <- prod(difficulty$p_win)
expect_equal(correct_p, p,
info = "p should be calculated as the product of difficulty$p_win .")
})
})
# Should our level designer worry about that a lot of
# players will complete the episode in one attempt?
should_the_designer_worry = FALSE
run_tests({
test_that("should_the_designer_worry is FALSE", {
expect_false(should_the_designer_worry,
info = "The probability is really small, so I don't think the designer should worry that much...")
})
})