main.tex

\documentclass{beamer}

\author{Yu Cong}
\title[template example]{Minimizing the Sum of Piecewise Linear Convex Functions}
\date{\today}

% \AtBeginSection[]{
%     \frame{\frametitle{Outline}\tableofcontents[currentsection, 
%     subsectionstyle=show/show/shaded]}
% }
    
\usetheme{Simple}
\usepackage{algo}
% \useoutertheme{tree}
    
\begin{document}
\begin{frame}[plain]
    % Print the title page as the first slide
    \titlepage
\end{frame}

\begin{frame}[plain]{Plan}
    \tableofcontents
\end{frame}

\section{Problems \& Definitions}
\begin{frame}{$\min \sum f_i(a_i\cdot x-b_i)$}
\begin{problem}
    Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
\end{problem}
\begin{figure}
    \centering
    \begin{subfigure}{.5\textwidth}
      \centering
      \includegraphics[width=.4\linewidth]{images/Piecewise_linear_function.svg.png}
      \caption{A 1D pwl function with 4 line segments and 3 breakpoints}
      \label{fig:sub1}
    \end{subfigure}%
    \begin{subfigure}{.5\textwidth}
      \centering
      \includegraphics[width=.4\linewidth]{images/Piecewise_linear_function2D.png}
      \caption{A 2D pwl concave function}
      \label{fig:sub2}
    \end{subfigure}
    % \caption{A figure with two subfigures}
    \label{fig:1}
    \end{figure}
    $f_i(a_i\cdot x-b_i):\R^d\to \R$ is also piecewise linear convex.
\end{frame}

\begin{frame}{General piecewise linear convex function in $\R^d$}
\begin{definition}[piecewise linear convex function in $\R^d$]\label{def:pwlc}
\[
g(x)=\max \{a_1^Tx+b_1,\ldots,a_L^Tx+b_L\}
\]
\end{definition}

Every piecewise linear convex function in $\R^d$ can be expressed in this form.\footnote{S.P. Boyd, L. Vandenberghe, \textbf{Convex optimization}, Cambridge University Press, Cambridge, UK ; New York, 2004.}

However, observe that in our problem the piecewise linear convex function is not that general. It is a composition of a linear mapping and an 1D piecewise linear convex function.

\end{frame}

\begin{frame}{$f\circ l\not \equiv g$}
    \begin{proof}
    \small
    Consider a piecewise linear convex function $g:\R^2\to \R$. $g$ can be viewed as the maximum of a set of planes in $\R^3$.

    Consider a series of points $P=\set{p_1,p_2,...,p_k}$ on the 2D plane. After applying the linear mapping to $P$, we will get a sequence of numbers(points in 1D) $P'=\set{p_1',p_2',...,p_k'}$. We assume that $P'$ is non-decreasing. Note that the value of $g$ on $P'$ is always unimodal since $g$ is convex. However, the value of $f$ on $P$ may not be unimodal. Thus the composition of a linear mapping and a pwl convex function in 1D is not equivalent to pwl convex functions in high dimensions.
    \end{proof}
\end{frame}

\section{Properties}
\begin{frame}[allowframebreaks]{A linear time algorithm}
    \begin{problem}
        Given $n$ piecewise linear convex functions $f_1,...,f_n:\R \to \R$ of total $m$ breakpoints, and $n$ linear functions $a_i\cdot x-b_i:\R^d\to \R$, find $\min_x \sum_i f_i(a_i\cdot x-b_i)$.
    \end{problem}
    This can be solve in $O(2^{2^d} (m+n))$ through Megiddo's Low dimension LP algorithm.\footnote{Nimrod Megiddo. Linear programming in linear time when the dimension is fixed. J. ACM, 31(1):114–127, jan 1984.}

    Let $n_i$ be the number of line segments in $f_i$. Note that $\sum_i n_i=m+n$.

    We can formulate the optimization problem as the following linear program,
    \newpage

    \begin{align*}
        \min &\sum_{i=1}^n f_i\\
        s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
    \end{align*}

    where $\alpha_j x - \beta_j$ is the $j$'th line segment on $f_i$.

    There will be $m+n$ constraints in total.
\end{frame}

\section{LP in Low Dimensions}
\begin{frame}[allowframebreaks]{Megiddo's algorithm}%
    \mybox[oliver!20]{
        \tiny
        \url{https://people.inf.ethz.ch/gaertner/subdir/texts/own_work/chap50-fin.pdf}
    }
    The dimension $d$ (in our problem, the dimension of $x$) is small while the number of constraints are huge. We need only $d$ linearly independent tight constraints to identify the optimal solution $x^*$.
    Thus most of the constraints are useless.


    \textbf{For one constraint, how can we know where does $x^*$ locate with respect to it?}

    Through inquiries. Let $a\cdot x \leq b$ be the constraint. Define 3 hyperplanes, $a\cdot x = c$ where $c\in \set{b,b-\e,b+\e}$. Now solve three $d-1$ dimension linear programming. The largest of the three objective functions tells us where $x^*$ lies with respect to the
    hyperplane.

    \newpage
    Finding the optimal solution $x^*$ is therefore equivalent to the following problem,
    \begin{problem}[Multidimensional Search Problem]
        Suppose that there exists a point $x^*$ which is not known to us, but there is a oracle that can tell the position of $x^*$ relative to any hyperplane in $\R^d$. Given $n$ hyperplanes, we want to know the position of $x^*$ relative to each of them.
    \end{problem}

    \textbf{What about 1 dimension search?} A fastest way will be using the linear time median algorithm. We can find the median of $n$ numbers and call the oracle to compare the median with $x^*$. Thus with $O(n)$ time median finding and one oracle call, we find the relative position of $n/2$ elements relative to $x^*$.

    \newpage

    If we can do similar things in $\R^d$, i.e., there is a method which makes $A(d)$ oracle calls and determines at least $B(d)$ fraction of relative positions, then we can apply this method $\log_{\frac{1}{1-B(d)}} n$ times to find all relative positions.

    Note that in 1 dimension, $A(1)=1$ and $B(1)=1/2$ (call oracle to compare $x^*$ and the median). In $\R^d$, our oracle is the recursive inquiry.

    A trivial method will be iterating on all hyperplanes and calling the oracle on each one, since there is no \emph{median} of a set of hyperplanes in $\R^d$. The complexity recurrence is 
    \[T(n,d)=n(3T(n-1,d-1)+O(nd))\] 
    Note that in this setting $A(d)=1$ and $B(d)=1/n$.

    \newpage
    Megiddo designed a clever method where $A(d)=2^{d-1}$ and $B(d)=2^{-(2^d-1)}$.

    \begin{lemma}
        \begin{figure}
            \includegraphics[width=.3\textwidth]{images/1234.pdf}
        \end{figure}
        Given two lines through the origin with slopes of opposite sign, knowing which quadrant $x^*$ lies in allows us to locate it with respect to at least one of the lines.
    \end{lemma}
    \newpage
    Let $l_H$ be the intersection of hyperplane $H$ and $x_1x_2$ plane.
    Compute a partition $S_1\sqcup S_2=\mathcal H$. 
    $H\in S_1$ iff $l_H$ has positive slope. Otherwise $l_H\in S_2$. We further assume that $|S_1|=|S_2|=n/2$.
    \noindent
    \begin{minipage}[t]{.5\textwidth}
    \begin{figure}
        \includegraphics[width=.85\textwidth]{images/4l1234.pdf}
    \end{figure}
    \end{minipage}% <---------------- Note the use of "%"
    \begin{minipage}[t]{.5\textwidth} 
        \vspace{25pt}
        Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
        Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
    \end{minipage}

    { 
    % Now we have $n/2$ pairs $(H_1,H_2)$, where $H_i\in S_i$. Let $l_i$ be the intersection of $H_i$ and $x_1x_2$ plane.
    % Let $H_{x_i}$ be the linear combination of $H_1$ and $H_2$ s.t. $x_i$ is eliminated.
    By the previous lemma, calling oracle on $l_{x_1}$ and $l_{x_2}$ locate $x^*$ with respect to at least one of $H_1$ and $H_2$.}
    \newpage
    Input: $S_1,S_2$ and the pairs.
    \begin{enumerate}
        \item recursively locate $x^*$ respect to $B(d-1)n/2$ hyperplanes($H_{x_i}$) with $A(d-1)$ oracle calls in $S_1$.
        \item locate with respect to a $B(d-1)$-fraction of corresponding paired hyperplanes in $S_2$.
        \item There are still $(1-{B(d-1)}^2)/2$-fraction of hyperplanes for which we do not know the relative position with $x^*$. Run this algorithm on these hyperplanes.
    \end{enumerate}
    This gives the recurrence
    \[
        T(n,d)\leq 3\cdot 2^{d-1}T(n,d-1)+T((1-2^{1-2^d})n,d)+O(nd)
    \]
    with solution $T(n,d)=O(2^{2^d}n)$.
\end{frame}
\begin{frame}{Zemel's conversion}
    \begin{align*}
        \min &\sum_{i=1}^n f_i\\
        s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
    \end{align*}
    Our linear program has \emph{dimension} $n+d$. 
    \textbf{\href{https://www.sciencedirect.com/science/article/abs/pii/0020019084900140}{Zemel}} showed that this kind of problem can be solved in linear time.
    \mybox[oliver!20]{
        This is a \emph{$d$-dimensional search problem} with $n+d$ hyperplanes.
    }
\end{frame}

\section{Possible Improvements}

\begin{frame}{Other algorithms for fixed dimension LP}
    \begin{figure}
        \centering
        \includegraphics[width=\textwidth]{images/table.png}
        \caption{Algorithms for LP in low dimensions \footnote{table stolen from \url{https://dl.acm.org/doi/10.1145/3155312}}}
    \end{figure}
    \textbf{Can we use faster fixed dimension LP algorithms to get better complexity?}
\end{frame}

\begin{frame}[allowframebreaks]{LP-type problem}
    Algorithms for low dim LP are actually solving a more abstract problem.
    \begin{definition}[LP-type problem]
        Given a set $S$ and a function $f:S\to \R$. $f$ satisfies two properties:%
        \begin{itemize}
            \item Monotonicity: $\forall A\subseteq B\subseteq S, f(A)\leq f(B)\leq f(S)$.
            \item Locality: $\forall A\subseteq B\subseteq S$ and $\forall x\in S$, if $f(A) = f(B) = f(A \cup \{x\})$, then $f(A) = f(B \cup \{x\})$.
        \end{itemize}
    \end{definition}
    Linear programs(minimization) are LP-type problems.

    $B\subseteq S$ is a basis if $\forall B'\subsetneq B, f(B')<f(B)$. A set of `useful' constraints in a linear program is a basis.

    The combinatorial dimension is the size of the largest basis.

    If a LP problem has low dimension, then its combinatorial dimension is low. \textbf{What about the converse?}
    \newpage
    \begin{align*}
        \min &\sum_{i=1}^n f_i\\
        s.t. \quad f_i&\geq \alpha_j(a_i\cdot x -b_i)-\beta_j \quad \forall i\in[n], \forall j\\
        &\cdots
    \end{align*}%
    \textbf{Does our LP has low combinatorial dimension?}
    
    No. A basis contains at least $n$ constraints since otherwise some $f_i$ is unbounded.

    \begin{problem}
        Is it possible to formulate the pwl convex minimization problem as an LP-type problem with low combinatorial dimension?
    \end{problem}
\end{frame}


\begin{frame}{Aggregate the pwl convex functions}
    % blog posts
    The sum of pwl convex functions are still pwl convex. 
    \newline If we can compute $F=\sum f_i$ in $O(m)$ and the number of line segments on $F$ is also $O(m)$, then the corresponding LP will have low combinatorial dimension.
    \begin{align*}
        \min \quad F\\
        s.t. \quad F&\geq \alpha_j\cdot x -\beta_j \quad \forall j\\
        &\cdots
    \end{align*}
    However, this is not possible for general pwl convex functions in $\R^d$.\footnote{see this \href{https://talldoor.uk/posts/2024-09-16-piecewise-linear.html}{blog post} for detail.}
\end{frame}
\begin{frame}{pseudocode}
    \begin{figure}[h!]
        \begin{algo}
            sort vertices in $G$ in such that $\deg(v_1)\geq \dots \geq \deg(v_n)$\\
            for $i\in[n]$:\\ 
            \quad for each vertex $u\in N(v_i)$:\\
            \quad \quad let $U[v]=\emptyset$ for all $v$.\\
            \quad \quad for each vertex $w\in N(u)$ that is not $v_i$:\\
            \quad \quad \quad add $u$ to $U[w]$.\\
            \quad for all vertex $w\in V$ that is not $v_i$:\\
            \quad \quad if $|U[w]|\geq \ell$:\\ \quad 
            \quad \quad \textbf{output} tuple $(v_i,w,U[w])$\\
            \quad $G=G-v_i$
        \end{algo}
        \caption{An $O(m\alpha(G))$ algorithm for finding all colored $K_{2,\ell'}$ for $\ell' \geq \ell$}
        \label{figalg:malpha}
    \end{figure}
\end{frame}

\end{document}