\documentclass[11pt]{article} \parindent 0in \usepackage{latexsym} \usepackage{amsmath} \usepackage{amssymb} %\usepackage{amsthm} %\usepackage{epsfig} \newcommand{\handout}[5]{ \noindent \begin{center} \framebox{ \vbox{ \hbox to 5.78in { {\bf 18.409 The Behavior of Algorithms in Practice} \hfill #2 } \vspace{4mm} \hbox to 5.78in { {\Large \hfill #5 \hfill} } \vspace{2mm} \hbox to 5.78in { {\it #3 \hfill #4} } } } \end{center} \vspace*{4mm} } \newcommand{\lecture}[4]{\handout{#1}{#2}{Lecturer: #3}{Scribe: #4}{Lecture #1}} \newcommand{\Kappa}{\kappa} \newcommand{\norm}[1]{\|#1\|} \newcommand{\infnorm}[1]{\norm{#1}_\infty} \newtheorem{theorem}{Theorem} \newtheorem{corollary}[theorem]{Corollary} \newtheorem{lemma}[theorem]{Lemma} \newtheorem{observation}[theorem]{Observation} \newtheorem{proposition}[theorem]{Proposition} \newtheorem{definition}[theorem]{Definition} \newtheorem{claim}[theorem]{Claim} \newtheorem{fact}[theorem]{Fact} \newtheorem{assumption}[theorem]{Assumption} \newcommand{\qed}{\ensuremath{\blacksquare}} \newenvironment{proof}{\noindent{\bf Proof}\hspace*{1em}}{\qed\bigskip} \setlength{\topmargin}{-0.50in} \setlength{\textheight}{8.5in} \setlength{\oddsidemargin}{0.0in} \setlength{\evensidemargin}{-0.0in} \setlength{\textwidth}{6in} \addtolength{\parskip}{2pt} \addtolength{\itemsep}{0.1in} \parskip 1.5ex \renewcommand{\baselinestretch}{1.25} \begin{document} \lecture{4}{2/21/2002}{Dan Spielman}{Matthew Lepinski} \section*{A Gaussian Elimination Example} To solve: $$ \left[\begin{array}{cc} \epsilon & 1 \\ 1 & 1 \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \end{array}\right] = \left[\begin{array}{c} 1 \\ 1 \end{array}\right] $$ First factor the matrix to get: $$ \left[\begin{array}{cc} 1 & 0 \\ \frac{1}{\epsilon} & 1 \end{array}\right] \left[\begin{array}{cc} \epsilon & 1 \\ 0 & 1 - \frac{1}{\epsilon} \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \end{array}\right] = \left[\begin{array}{c} 1 \\ 1 \end{array}\right] $$ Next solve: $$ \left[\begin{array}{cc} 1 & 0 \\ \frac{1}{\epsilon} & 1 \end{array}\right] \left[\begin{array}{c} y_1 \\ y_2 \end{array}\right] = \left[\begin{array}{c} 1 \\ 1 \end{array}\right] $$ To get: \begin{eqnarray*} y_1 &=& 1 \\ y_2 &=& 1-\frac{1}{\epsilon} \end{eqnarray*} Finally solve: $$ \left[\begin{array}{cc} \epsilon & 1 \\ 0 & 1 - \frac{1}{\epsilon} \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \end{array}\right] = \left[\begin{array}{c} y_1 \\ y_2 \end{array}\right] $$ To get: \begin{eqnarray*} x_1 &=& 0 \\ x_2 &=& 1 \end{eqnarray*} Which is the solution to the original system. When viewed this way, Gaussian elimination is just LU factorization of a matrix followed by some simple substitutions. \section*{Floating Point} We consider a model of floating point computation where it is possible to represent numbers of the form: $$ \frac{m}{2^t}2^b $$ Where $m$ is an integer such that $-2^t \leq m \leq 2^t$. In this model, $t$ is the precision of the floating point representation. In what follows, we will ignore bounds on $b$. Let $\epsilon_{mach}$ be defined so that $1 + \epsilon_{mach}$ is the smallest number greater than $1$ which the machine can represent. The goal of a floating point computation is to provide an answer which is correct to within a factor of $(1 \pm \epsilon_{mach})$. Consider the example in the previous section where $\epsilon < \epsilon_{mach}$. In this case, we will compute the LU factorization as: $$ \left[\begin{array}{cc} 1 & 0 \\ \frac{1}{\epsilon} & 1 \end{array}\right] \left[\begin{array}{cc} \epsilon & 1 \\ 0 & - \frac{1}{\epsilon} \end{array}\right] $$ This means that using Gaussian Elimination (with no pivoting) we will actually be solving the system: $$ \left[\begin{array}{cc} \epsilon & 1 \\ 1 & 0 \end{array}\right] \left[\begin{array}{c} x_1 \\ x_2 \end{array}\right] = \left[\begin{array}{c} 1 \\ 1 \end{array}\right] $$ And so will get the solution: \begin{eqnarray*} x_1 &=& 1 \\ x_2 &=& 1 - \epsilon \end{eqnarray*} Which is nowhere near the correct solution to the original system. \textbf{Note:} The matrix in the previous example is well-conditioned, having a condition number of about 2.68, but we still fail miserably when doing Gaussian Elimination on this matrix. \textbf{Exercise:} Do the same thing for the system: $$ \left[\begin{array}{cc} 1 & 1 \\ \epsilon & 1 \end{array}\right] \left[\begin{array}{c} x_2 \\ x_1 \end{array}\right] = \left[\begin{array}{c} 1 \\ 1 \end{array}\right] $$ You should observe that permuting the rows/columns of the matrix (pivoting) allows you to solve the system with Gaussian Elimination even when $\epsilon < \epsilon_{mach}$. \begin{theorem}[Wilkinson] If you solve $Ax = b$ computing $\hat{L}$, $\hat{U}$ and $\hat{x}$, then there exists a $\delta A$ such that $$ (A + \delta A)\hat{x} = b $$ and $$ \frac{\infnorm{\delta A}}{\infnorm{A}} \leq n \epsilon_{mach} \left(3 + \frac{5 \infnorm{\hat{L}} \infnorm{\hat{U}}}{\infnorm{A}}\right) $$ \end{theorem} The problem with the previous example is that although $A$ had small entries, $U$ had a very large entry. When doing Gaussian Elimination, we say that the growth factor is: $$ \frac{\infnorm{U}}{\infnorm{A}} $$ \section*{Partial Pivoting} \textbf{Idea:} Permute the rows but not the columns such that the pivot is the largest entry in its column. \textbf{Note:} This is the technique used by Matlab. At step $j$ in the Gaussian Elimination, permute the rows so that $|a_{j,j}| \geq |a_{i,j}|$ for all $i > j$. This guarantees that $\infnorm{L} \leq 1$. However, in the worst case, partial pivoting yields a growth factor of $2^{n-1}$ for an $n$-by-$n$ matrix. \section*{Complete Pivoting} \textbf{Idea:} Permute the rows and the columns such that the pivot is the largest entry in the matrix. Wilkinson proved that Complete Pivoting guarantees that: $$ \frac{\infnorm{U}}{\infnorm{A}} \leq n^{\frac{1}{2}\log(n)} $$ However, it is conjectured that the growth factor can be upper bounded by something closer to $n$. Unfortunately, using complete pivoting requires about twice as many floating point operations as partial pivoting. Therefore, since partial pivoting works well in practice, complete pivoting is hardly ever used. \section*{Sometimes You Don't Need to Pivot} \begin{enumerate} \item If $A$ is diagonally dominant then it is possible to bound the size of the entries in $L$. \item If $A$ is positive definite then it is possible to bound the size of the entries in $U$. \end{enumerate} Having both of these conditions is very nice. In practice, both of these conditions show up quite often. \begin{definition} A matrix, $A$, is (column-wise) diagonally dominant if for all $j$, $$ |a_{j,j}| \geq \sum_{i \neq j} a_{i,j} $$ \end{definition} \begin{theorem} If $A$ is (column-wise) diagonally dominant, then $l_{i,j} \leq 1$. Equivalently, if $A$ is diagonally dominant then one does not permute when using partial pivoting. \end{theorem} \begin{proof} After the $k^{th}$ round of Gaussian Elimination, we refer to the $n-k$ by $n-k$ matrix in the lower left corner as $A^{(k)}$. If suffices to prove that all of the $A^{(k)}$ are diagonally dominant. We will show that $A^{(1)}$ is diagonally dominant. A straightforward inductive argument can be used to show all of the $A^{(k)}$ are diagonally dominant. \begin{claim} $A^{(1)}$ is diagonally dominant. \end{claim} Let $$ A = \left[\begin{array}{cc} \alpha & w \\ v & B \end{array}\right] $$ Then one step of Gaussian Elimination yields $$ A = \left[\begin{array}{cc} 1 & 0 \\ \frac{v}{\alpha} & I \end{array}\right] \left[\begin{array}{cc} \alpha & w \\ 0 & B - \frac{vw}{\alpha} \end{array}\right] $$ Therefore, $A^{(1)} = B - \frac{vw}{\alpha}$. Let $a^{(1)}_{i,j}$ be the $i,j$ entry of $A^{(1)}$. It suffices to show that $$ |a^{(1)}_{j,j} \geq \sum_{i \geq 2, i \neq j} |a^{(1)}_{i,j}| $$ We know that \begin{equation*} \sum_{i \geq 2, i \neq j} |a^{(1)}_{i,j}| = \sum_{i \geq 2, i \neq j} |b_{i,j} - \frac{v_i w_j}{\alpha}| \leq \sum_{i \geq 2, i \neq j} |b_{i,j}| + \frac{|w_j|}{\alpha} \sum_{i \geq 2, i \neq j} |v_i| \end{equation*} Since $A$ is diagonally dominant, it follows that \begin{eqnarray*} \sum_{i \geq 2, i \neq j} |a^{(1)}_{i,j}| &\leq& (|b_{j,j}| - |w_j|) + \frac{|w_j|}{\alpha}(|\alpha| - |v_j|) = |b_{j,j}| - \frac{|w_j|}{|\alpha|}|v_j| \\ &\leq& \left| b_{j,j} - \frac{w_j v_j}{\alpha} \right| = |a^{(1)}_{j,j} \end{eqnarray*} \end{proof} \begin{definition} $A$ is positive definite if $A$ is symmetric and for all $x$, $xAx^{T} > 0$ \end{definition} \textbf{Exercise:} The above definition is equivalent to the following: \begin{enumerate} \item All eigenvalues of $A$ are positive. \item All principal minors of $A$ are positive definite. \end{enumerate} \textbf{Exercise:} Eigenvalues of $A_{2..n, 2..n}$ interlace the eigenvalues of $A$. Additionally, the following two facts are implied by Item \#2 above: \begin{itemize} \item Diagonal entries of $A$ are positive. \item The entry with the largest absolute value lies on a diagonal. \end{itemize} \begin{theorem} If $A$ is positive definite, then $\infnorm{A^{(k)}} \leq \infnorm{A}$. \end{theorem} \textbf{Note:} This implies that $\infnorm{U} \leq \infnorm{A}$. \begin{proof} We first prove that the $A^{(k)}$ are positive definite. We will show that $A^{(1)}$ is positive definite. A straightforward inductive argument can be used to show all of the $A^{(k)}$ are diagonally dominant. It is easy to see that $A^{(1)}$ is symmetric and so it suffices to show that for all $x$, $xAx^{T} > 0$. Let $$ A = \left[\begin{array}{cc} \alpha & v^{T} \\ v & B \end{array}\right] $$ and recall that $$ A^{(1)} = B - \frac{v v^{T}}{\alpha} $$ Therefore, \begin{multline*} xAx^{T} - (x_2 \ldots x_n) A^{(1)}(x_2 \ldots x_n)^{T} = \\ \alpha x_1^2 + 2x_1 \sum_{i\geq 2} v_i x_i + \sum_{i \geq 2, j\geq 2} b_{i,j}x_i x_j - \sum_{i \geq 2, j\geq 2}(b_{i,j}-\frac{v_i v_j}{\alpha})x_i x_j \end{multline*} Therefore, by cancellation, $$ xAx^{T} - (x_2 \ldots x_n) A^{(1)}(x_2 \ldots x_n)^{T} = \alpha \left(x_i + \sum_{i\geq 2} \frac{v_i x_i}{\alpha}\right)^2 $$ This means that for any $x_2 \ldots x_n$, setting $$ x_1 = - \sum_{i\geq 2} \frac{v_i x_i}{\alpha} $$ yields $xAx^{T} = (x_2 \ldots x_n) A^{(1)}(x_2 \ldots x_n)^{T}$. Therefore, if $A^{(1)}$ is not positive definite, then neither is $A$. Now all that remains to be shown is that $\infnorm{A^{(1)}} \leq \infnorm{A}$. This will follow from two facts that were previously observed about positive definite matrices. (We repeat them here for convenience. \begin{itemize} \item Diagonal entries of $A$ are positive. \item The entry with the largest absolute value lies on a diagonal. \end{itemize} Therefore, we know that the largest entry of $A^{(1)}$ is $a^{(1)}_{j,j}$ for some $j \geq 2$. $$ 0 < a^{(1)}_{j,j} = b_{j,j} - \frac{v_j^2}{\alpha} \leq b_{j,j} = a_{j,j} $$ \end{proof} \section*{A Smoothed Analysis Theorem} \begin{theorem} Let $\bar{A}$ be any matrix with $\infnorm{\bar{A}} \leq 1$ and let $A = \bar{A} + G$ where $G$ is a Gaussian random matrix with variance $\sigma^2$. Then \begin{equation} Prob[\infnorm{U} > 4n^{\frac{7}{2}}\sqrt{\log(n)}/\epsilon] < \frac{\epsilon}{\sigma} \end{equation} \begin{equation} Prob[\infnorm{L} > 4n^{\frac{7}{2}}\sqrt{\log(n)}/\epsilon] < \frac{\epsilon \log(\frac{1}{\epsilon})}{\sigma} \end{equation} where $A = LU$. \end{theorem} We will prove this theorem during the next lecture. \end{document}