-
Notifications
You must be signed in to change notification settings - Fork 12
/
Classification.tex
27 lines (23 loc) · 1.01 KB
/
Classification.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
\section*{Classification}
Solve $w^* = \underset{w}{\operatorname{argmin}} ~ l(w;x_i,y_i)$; loss function $l$
\subsection*{0/1 loss}
$l_{0/1} (w;y_i,x_i) = 1 \text{ if } y_i \neq \operatorname{sign}(w^Tx_i) \text{ else } 0$
% Non-convex: use for evaluation, but need surrogate loss for training
\subsection*{Perceptron algorithm}
Use $l_P (w;y_i,x_i) = \operatorname{max}(0, -y_i w^T x_i)$ and SGD\\
$\nabla_w l_P(w;y_i,x_i) =
\begin{cases}
0 &\text{if } y_i w^T x_i \geq 0\\
-y_i x_i &\text{otherwise}
\end{cases}$ \\
Data lin. separable $\Leftrightarrow$ obtains a lin. separator (not necessarily optimal)
\subsection*{Support Vector Machine (SVM)}
Hinge loss: $l_H(w;x_i,y_i) = \operatorname{max}(0,1-y_i w^T x_i)$ \\
$\nabla_w l_H(w;y,x) =
\begin{cases}
0 &\text{if } y_i w^T x_i \geq 1\\
-y_i x_i &\text{otherwise}
\end{cases}$\\
$w^* = \underset{w}{\operatorname{argmin}} ~ l_H(w;x_i,y_i) + \lambda||w||_2^2$
% regularisation needed to account for arbitrary choice of 1
% could obviously be any regulariser