forked from zbowang/BeamerTheme
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.tex
257 lines (213 loc) · 11.9 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
\documentclass{beamer}
\author{Yu Cong}
\title[template example]{Minimizing the Sum of Piecewise Linear Convex Functions}
\date{\today}
% \AtBeginSection[]{
% \frame{\frametitle{Outline}\tableofcontents[currentsection,
% subsectionstyle=show/show/shaded]}
% }
\usetheme{Simple}
\usepackage{algo}
% \useoutertheme{tree}
\begin{document}
\begin{frame}[plain]
% Print the title page as the first slide
\titlepage
\end{frame}
\begin{frame}[plain]{Plan}
\tableofcontents
\end{frame}
\section{Problems \& Definitions}
\begin{frame}{$\min \sum f_i(a_i\cdot x-b_i)$}
\begin{problem}
Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
\end{problem}
\begin{figure}
\centering
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=.4\linewidth]{images/Piecewise_linear_function.svg.png}
\caption{A 1D pwl function with 4 line segments and 3 breakpoints}
\label{fig:sub1}
\end{subfigure}%
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=.4\linewidth]{images/Piecewise_linear_function2D.png}
\caption{A 2D pwl concave function}
\label{fig:sub2}
\end{subfigure}
% \caption{A figure with two subfigures}
\label{fig:1}
\end{figure}
$f_i(a_i\cdot x-b_i):\R^d\to \R$ is also piecewise linear convex.
\end{frame}
\begin{frame}{General piecewise linear convex function in $\R^d$}
\begin{definition}[piecewise linear convex function in $\R^d$]\label{def:pwlc}
\[
g(x)=\max \{a_1^Tx+b_1,\ldots,a_L^Tx+b_L\}
\]
\end{definition}
Every piecewise linear convex function in $\R^d$ can be expressed in this form.\footnote{S.P. Boyd, L. Vandenberghe, \textbf{Convex optimization}, Cambridge University Press, Cambridge, UK ; New York, 2004.}
However, observe that in our problem the piecewise linear convex function is not that general. It is a composition of a linear mapping and an 1D piecewise linear convex function.
\end{frame}
\begin{frame}{$f\circ l\not \equiv g$}
\begin{proof}
\small
Consider a piecewise linear convex function $g:\R^2\to \R$. $g$ can be viewed as the maximum of a set of planes in $\R^3$.
Consider a series of points $P=\set{p_1,p_2,...,p_k}$ on the 2D plane. After applying the linear mapping to $P$, we will get a sequence of numbers(points in 1D) $P'=\set{p_1',p_2',...,p_k'}$. We assume that $P'$ is non-decreasing. Note that the value of $g$ on $P'$ is always unimodal since $g$ is convex. However, the value of $f$ on $P$ may not be unimodal. Thus the composition of a linear mapping and a pwl convex function in 1D is not equivalent to pwl convex functions in high dimensions.
\end{proof}
\end{frame}
\section{Properties}
\begin{frame}[allowframebreaks]{A linear time algorithm}
\begin{problem}
Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
\end{problem}
This can be solve in $O(2^{2^d} (m+n))$ through Megiddo's Low dimension LP algorithm.\footnote{Nimrod Megiddo. Linear programming in linear time when the dimension is fixed. J. ACM, 31(1):114–127, jan 1984.}
Let $n_i$ be the number of line segments in $f_i$. Note that $\sum_i n_i=m+n$.
We can formulate the optimization problem as the following linear program,
\newpage
\begin{align*}
\min &\sum_{i=1}^n f_i\\
s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
\end{align*}
where $\alpha_j x - \beta_j$ is the $j$'th line segment on $f_i$.
There will be $m+n$ constraints in total.
\end{frame}
\section{LP in Low Dimensions}
\begin{frame}[allowframebreaks]{Megiddo's algorithm}%
\mybox[oliver!20]{
\tiny
\url{https://people.inf.ethz.ch/gaertner/subdir/texts/own_work/chap50-fin.pdf}
}
The dimension $d$ (in our problem, the dimension of $x$) is small while the number of constraints are huge. We need only $d$ linearly independent tight constraints to identify the optimal solution $x^*$.
Thus most of the constraints are useless.
\textbf{For one constraint, how can we know where does $x^*$ locate with respect to it?}
Through inquiries. Let $a\cdot x \leq b$ be the constraint. Define 3 hyperplanes, $a\cdot x = c$ where $c\in \set{b,b-\e,b+\e}$. Now solve three $d-1$ dimension linear programming. The largest of the three objective functions tells us where $x^*$ lies with respect to the
hyperplane.
\newpage
Finding the optimal solution $x^*$ is therefore equivalent to the following problem,
\begin{problem}[Multidimensional Search Problem]
Suppose that there exists a point $x^*$ which is not known to us, but there is a oracle that can tell the position of $x^*$ relative to any hyperplane in $\R^d$. Given $n$ hyperplanes, we want to know the position of $x^*$ relative to each of them.
\end{problem}
\textbf{What about 1 dimension search?} A fastest way will be using the linear time median algorithm. We can find the median of $n$ numbers and call the oracle to compare the median with $x^*$. Thus with $O(n)$ time median finding and one oracle call, we find the relative position of $n/2$ elements relative to $x^*$.
\newpage
If we can do similar things in $\R^d$, i.e., there is a method which makes $A(d)$ oracle calls and determines at least $B(d)$ fraction of relative positions, then we can apply this method $\log_{\frac{1}{1-B(d)}} n$ times to find all relative positions.
Note that in 1 dimension, $A(1)=1$ and $B(1)=1/2$ (call oracle to compare $x^*$ and the median). In $\R^d$, our oracle is the recursive inquiry.
A trivial method will be iterating on all hyperplanes and calling the oracle on each one, since there is no \emph{median} of a set of hyperplanes in $\R^d$. The complexity recurrence is
\[T(n,d)=n(3T(n-1,d-1)+O(nd))\]
Note that in this setting $A(d)=1$ and $B(d)=1/n$.
\newpage
Megiddo designed a clever method where $A(d)=2^{d-1}$ and $B(d)=2^{-(2^d-1)}$.
\begin{lemma}
\begin{figure}
\includegraphics[width=.3\textwidth]{images/1234.pdf}
\end{figure}
Given two lines through the origin with slopes of opposite sign, knowing which quadrant $x^*$ lies in allows us to locate it with respect to at least one of the lines.
\end{lemma}
\newpage
Let $l_H$ be the intersection of hyperplane $H$ and $x_1x_2$ plane.
Compute a partition $S_1\sqcup S_2=\mathcal H$.
$H\in S_1$ iff $l_H$ has positive slope. Otherwise $l_H\in S_2$. We further assume that $|S_1|=|S_2|=n/2$.
\noindent
\begin{minipage}[t]{.5\textwidth}
\begin{figure}
\includegraphics[width=.85\textwidth]{images/4l1234.pdf}
\end{figure}
\end{minipage}% <---------------- Note the use of "%"
\begin{minipage}[t]{.5\textwidth}
\vspace{25pt}
Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
\end{minipage}
{
% Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
% Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
By the previous lemma, calling oracle on $l_{x_1}$ and $l_{x_2}$ locate $x^*$ with respect to at least one of $H_1$ and $H_2$.}
\newpage
Input: $S_1,S_2$ and the pairs.
\begin{enumerate}
\item recursively locate $x^*$ respect to $B(d-1)n/2$ hyperplanes($H_{x_i}$) with $A(d-1)$ oracle calls in $S_1$.
\item locate with respect to a $B(d-1)$-fraction of corresponding paired hyperplanes in $S_2$.
\item There are still $(1-{B(d-1)}^2)/2$-fraction of hyperplanes for which we do not know the relative position with $x^*$. Run this algorithm on these hyperplanes.
\end{enumerate}
This gives the recurrence
\[
T(n,d)\leq 3\cdot 2^{d-1}T(n,d-1)+T((1-2^{1-2^d})n,d)+O(nd)
\]
with solution $T(n,d)=O(2^{2^d}n)$.
\end{frame}
\begin{frame}{Zemel's conversion}
\begin{align*}
\min &\sum_{i=1}^n f_i\\
s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
\end{align*}
Our linear program has \emph{dimension} $n+d$.
\textbf{\href{https://www.sciencedirect.com/science/article/abs/pii/0020019084900140}{Zemel}} showed that this kind of problem can be solved in linear time.
\mybox[oliver!20]{
This is a \emph{$d$-dimensional search problem} with $n+d$ hyperplanes.
}
\end{frame}
\section{Possible Improvements}
\begin{frame}{Other algorithms for fixed dimension LP}
\begin{figure}
\centering
\includegraphics[width=\textwidth]{images/table.png}
\caption{Algorithms for LP in low dimensions \footnote{table stolen from \url{https://dl.acm.org/doi/10.1145/3155312}}}
\end{figure}
\textbf{Can we use faster fixed dimension LP algorithms to get better complexity?}
\end{frame}
\begin{frame}[allowframebreaks]{LP-type problem}
Algorithms for low dim LP are actually solving a more abstract problem.
\begin{definition}[LP-type problem]
Given a set $S$ and a function $f:S\to \R$. $f$ satisfies two properties:%
\begin{itemize}
\item Monotonicity: $\forall A\subseteq B\subseteq S, f(A)\leq f(B)\leq f(S)$.
\item Locality: $\forall A\subseteq B\subseteq S$ and $\forall x\in S$, if $f(A) = f(B) = f(A \cup \{x\})$, then $f(A) = f(B \cup \{x\})$.
\end{itemize}
\end{definition}
Linear programs(minimization) are LP-type problems.
$B\subseteq S$ is a basis if $\forall B'\subsetneq B, f(B')<f(B)$. A set of `useful' constraints in a linear program is a basis.
The combinatorial dimension is the size of the largest basis.
If a LP problem has low dimension, then its combinatorial dimension is low. \textbf{What about the converse?}
\newpage
\begin{align*}
\min &\sum_{i=1}^n f_i\\
s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
&\cdots
\end{align*}%
\textbf{Does our LP has low combinatorial dimension?}
No. A basis contains at least $n$ constraints since otherwise some $f_i$ is unbounded.
\begin{problem}
Is it possible to formulate the pwl convex minimization problem as an LP-type problem with low combinatorial dimension?
\end{problem}
\end{frame}
\begin{frame}{Aggregate the pwl convex functions}
% blog posts
The sum of pwl convex functions are still pwl convex.
\newline If we can compute $F=\sum f_i$ in $O(m)$ and the number of line segments on $F$ is also $O(m)$, then the corresponding LP will have low combinatorial dimension.
\begin{align*}
\min \quad F\\
s.t. \quad F&\geq \alpha_j\cdot x -\beta_j \quad \forall j\\
&\cdots
\end{align*}
However, this is not possible for general pwl convex functions in $\R^d$.\footnote{see this \href{https://talldoor.uk/posts/2024-09-16-piecewise-linear.html}{blog post} for detail.}
\end{frame}
\begin{frame}{pseudocode}
\begin{figure}[h!]
\begin{algo}
sort vertices in $G$ in such that $\deg(v_1)\geq \dots \geq \deg(v_n)$\\
for $i\in[n]$:\\
\quad for each vertex $u\in N(v_i)$:\\
\quad \quad let $U[v]=\emptyset$ for all $v$.\\
\quad \quad for each vertex $w\in N(u)$ that is not $v_i$:\\
\quad \quad \quad add $u$ to $U[w]$.\\
\quad for all vertex $w\in V$ that is not $v_i$:\\
\quad \quad if $|U[w]|\geq \ell$:\\ \quad
\quad \quad \textbf{output} tuple $(v_i,w,U[w])$\\
\quad $G=G-v_i$
\end{algo}
\caption{An $O(m\alpha(G))$ algorithm for finding all colored $K_{2,\ell'}$ for $\ell' \geq \ell$}
\label{figalg:malpha}
\end{figure}
\end{frame}
\end{document}