-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy patheval
executable file
·107 lines (89 loc) · 3.5 KB
/
eval
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/bin/bash
# Usage: ./eval STEP_SIZE EM_ITER OUTPUT
# where OUTPUT is the destination file of the scores
# STEP_SIZE is the increments of lines of extra data to go by e.g. STEP_SIZE=100
# EM_ITER is the number of iterations of EM to perform
# will run on 0,100,200,300,...,39672
#
# Set by definition to run on English, for 1 iteration of EM
#
# Watch the tagger output in real-time with tail -f debug/semisupervised_stdout.txt
if [ $# -ne "3" ] && [ $# -ne "4" ]; then
echo "usage: eval STEP_SIZE EM_ITER OUTPUT (EN|SANS)"
echo "Evaluate accuracy of semi-supervised HMM as a function of unlabeled corpus size."
echo ""
echo "Arguments:"
echo " STEP_SIZE: Num. of lines data is increased by on each iteration."
echo " EM_ITER: Num. of iterations of EM"
echo " OUTPUT: Path to output table of corpus size vs. accuracy"
echo " (EN|SANS): Either EN (english) or SANS (sanskrit). Defaults to English."
exit 1
fi
# parameters to this script:
step_size=$1
em_iter=$2
output=$3
OUTPUT_DIR="debug" # where should the output from this program go?
# parameters to the tagger:
TRAIN="data/en/wsj2-21-trunc.txt" # labeled training corpus
TEST="data/en/wsj22-trunc.txt" # labeled test corpus
NOTAGS="data/en/wsj2-21-notags.txt" # unlabeled training corpus
OUT="data/output.txt" # tagged output file (temporary)
EXTRA="data/tmp.txt" # corpus of determined length (temporary)
iter=$em_iter
lang="EN"
# if a 4th arg is passed
if [ $# -eq "4" ]; then
if [[ "${4}" -eq "SANS" ]]; then # check if we should be in sanskrit mode
TRAIN="data/sans/train.txt"
TEST="data/sans/test.txt"
NOTAGS="data/sans/GRETILNoTagsTrain.txt"
lang="SANS"
elif [[ "${4}" -ne "EN" ]]; then
echo "$4 is not a valid language option."
exit 1
fi
fi
tsize=( $( wc -l ${TRAIN}) ) # get size of training data
testsize=( $( wc -l ${TEST}) ) # get size of test data
len=( $( wc -l ${NOTAGS}) ) # make into array, use only first word (the length)
# make sure debug directory exists, create it if not
if [ ! -d "${OUTPUT_DIR}" ]; then
mkdir "${OUTPUT_DIR}"
fi
# remove old output file (check for confirm):
if [ -e ${output} ]; then
rm -i ${output}
if [ $? -ne "0" ]; then
exit 1 # exit on error
fi
fi
# remove old tmp file
if [ -e ${EXTRA} ]; then
rm -f ${EXTRA}
fi
trap "kill -9 $$" SIGINT SIGTERM # catch these signals to kill tagger and this script
echo "Running ${lang} tagger based on:"
printf "\t• ${tsize[0]} lines labeled training data, ${testsize[0]} lines of test data.\n"
printf "\t• Up to ${len[0]} lines unlabeled training data, incrementing by ${step_size} lines.\n"
printf "\t• ${iter} iterations of EM.\n"
printf "\nSupervised baseline accuracy : "
./tagger.py --lang ${lang} --model super --train ${TRAIN} --test ${TEST} --output ${OUT} >"${OUTPUT_DIR}/supervised_stdout.txt" 2>"${OUTPUT_DIR}/supervised_stderr.txt"
printf "$( ./score.py --lang ${lang} ${TEST} ${OUT} )\n"
printf "\nSemi-supervised accuracy:\n"
# i is the size of the data/tmp.txt file (in no. lines)
for i in $( seq 0 ${step_size} ${len[0]} ); do
printf " ${i} unlabeled lines: "
if [ $i -eq 0 ]; then
touch ${EXTRA}
else
head -n $i ${NOTAGS} > ${EXTRA}
fi
./tagger.py --lang ${lang} --model semisuper --iter $iter --train ${TRAIN} --test ${TEST} --output ${OUT} --extra ${EXTRA} >"${OUTPUT_DIR}/semisupervised_stdout.txt" 2>"${OUTPUT_DIR}/semisupervised_stderr.txt"
score=$( ./score.py --lang ${lang} ${TEST} ${OUT} )
printf "${score}\n"
# log to file:
echo "${i} ${score}" >> ${output}
done
# cleanup:
rm -f ${OUT} ${EXTRA}