-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
105 lines (90 loc) · 3.92 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
test_statistics = \
-s 'table' -s 'accuracy' -s 'precision' -s 'recall' \
-s 'pr' -s 'roc' \
-s 'recall_at_fpr(max_fpr=0.10)' \
-s 'filter_rate_at_recall(min_recall=0.90)' \
-s 'filter_rate_at_recall(min_recall=0.75)'
##### Models #################################################################
models/enwiki.attack.gradient_boosting.model: \
datasets/detox_labels.mturk.dev_train.w_cache.json.bz2
bzcat datasets/detox_labels.mturk.dev_train.w_cache.json.bz2 | \
revscoring cv_test \
revscoring.scorer_models.GradientBoosting \
commquality.feature_lists.enwiki.attack \
attack \
--version=0.0.1 \
-p 'max_depth=7' \
-p 'learning_rate=0.01' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
$(test_statistics) \
--balance-sample-weight \
--center --scale > \
models/enwiki.attack.gradient_boosting.model
models/enwiki.aggression.gradient_boosting.model: \
datasets/detox_labels.mturk.dev_train.w_cache.json.bz2
bzcat datasets/detox_labels.mturk.dev_train.w_cache.json.bz2 | \
revscoring cv_test \
revscoring.scorer_models.GradientBoosting \
commquality.feature_lists.enwiki.aggression \
aggression \
--version=0.0.1 \
-p 'max_depth=7' \
-p 'learning_rate=0.01' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
$(test_statistics) \
--balance-sample-weight \
--center --scale > \
models/enwiki.aggression.gradient_boosting.model
###### Feature sets ###########################################################
datasets/detox_labels.mturk.dev_train.w_cache.json.bz2: \
datasets/detox_labels.mturk.dev.json.bz2 \
datasets/detox_labels.mturk.train.json.bz2
bzcat \
datasets/detox_labels.mturk.dev.json.bz2 \
datasets/detox_labels.mturk.train.json.bz2 | \
revscoring extract --host https://en.wikipedia.org \
commquality.feature_lists.enwiki.attack \
commquality.feature_lists.enwiki.aggression \
--verbose | bzip2 -c > \
datasets/detox_labels.mturk.dev_train.w_cache.json.bz2
###### Datasets ###############################################################
base_datasets: \
datasets/detox_annotations.mturk.dev.json.bz2 \
datasets/detox_annotations.mturk.train.json.bz2 \
datasets/detox_annotations.mturk.test.json.bz2
label_datasets: \
datasets/detox_labels.mturk.dev.json.bz2 \
datasets/detox_labels.mturk.train.json.bz2 \
datasets/detox_labels.mturk.test.json.bz2 \
#datasets/detox_annotations.mturk.dev.json.bz2: \
# datasets/dirty_tsv/detox_annotations.mturk.dev.dirty_tsv.bz2
# bzcat datasets/dirty_tsv/detox_annotations.mturk.dev.dirty_tsv.bz2 | \
# python clean_detox_tsv.py | \
# bzip2 -c > datasets/detox_annotations.mturk.dev.json.bz2
#datasets/detox_annotations.mturk.train.json.bz2: \
# datasets/dirty_tsv/detox_annotations.mturk.train.dirty_tsv.bz2
# bzcat datasets/dirty_tsv/detox_annotations.mturk.train.dirty_tsv.bz2 | \
# python clean_detox_tsv.py | \
# bzip2 -c > datasets/detox_annotations.mturk.train.json.bz2
#datasets/detox_annotations.mturk.test.json.bz2: \
# datasets/dirty_tsv/detox_annotations.mturk.test.dirty_tsv.bz2
# bzcat datasets/dirty_tsv/detox_annotations.mturk.test.dirty_tsv.bz2 | \
# python clean_detox_tsv.py | \
# bzip2 -c > datasets/detox_annotations.mturk.test.json.bz2
datasets/detox_labels.mturk.dev.json.bz2: \
datasets/detox_annotations.mturk.dev.json.bz2
bzcat datasets/detox_annotations.mturk.dev.json.bz2 | \
python aggregate_annotations.py | \
bzip2 -c > datasets/detox_labels.mturk.dev.json.bz2
datasets/detox_labels.mturk.train.json.bz2: \
datasets/detox_annotations.mturk.train.json.bz2
bzcat datasets/detox_annotations.mturk.train.json.bz2 | \
python aggregate_annotations.py | \
bzip2 -c > datasets/detox_labels.mturk.train.json.bz2
datasets/detox_labels.mturk.test.json.bz2: \
datasets/detox_annotations.mturk.test.json.bz2
bzcat datasets/detox_annotations.mturk.test.json.bz2 | \
python aggregate_annotations.py | \
bzip2 -c > datasets/detox_labels.mturk.test.json.bz2