\documentclass[landscape]{seminar}
\slideframe{none}
\usepackage{epsf}
\usepackage{color}
\usepackage{amssymb,amsfonts}

\newcommand{\ket}[1]{\left | \, #1 \right \rangle}
\newcommand{\bra}[1]{\left \langle #1 \, \right |}
\newcommand{\vpols}{\mbox{$\updownarrow$}}
\newcommand{\hpols}{\mbox{$\leftrightarrow$}}

\newcommand{\ppols}{\mbox{\small
\,\mbox{$\nearrow$}\llap{\mbox{$\swarrow$}}\,}}

\newcommand{\qpols}{\mbox{\small
\,\mbox{$\searrow$}\llap{\mbox{$\nwarrow$}}\,}}

\newcommand{\rpols}{\mbox{\raisebox{-0.66ex}{$\triangleleft$}$\!\!\supset$}}
\newcommand{\lpols}{\mbox{$\subset\!\!$\raisebox{-0.66ex}{$\triangleright$}}}

\newcommand{\rz}{{\color{red}0}}
\newcommand{\ro}{{\color{red}1}}
\newcommand{\bz}{{\color{blue}0}}
\newcommand{\bo}{{\color{blue}1}}


\begin{document}

\begin{slide}
\begin{center}
{\large
Quantum Channels
}

Peter Shor\\
MIT\\
Cambridge, MA

\end{center}

\end{slide}

\begin{slide}
{\large
Claude Shannon, 1948}

\noindent
The fundamental problem of communication is that of
reproducing at one point either exactly or approximately a
message selected at another point.
\end{slide}

\begin{slide}
{\large John Pierce, 1973}

I think that I have never met a physicist who understood information
theory.  I wish that physicists would stop talking about reformulating
information theory and would give us a general expression for the
capacity of a channel with quantum effects taken into account rather
than a number of special cases.

\end{slide}

%\begin{slide}
%{\large John Wheeler, 1998}

%I think of my lifetime in physics as divided into three periods.  
%In the first period ... I was in the grip of the idea that 
%{\em Everything Is Particles} ...
%I call my second period {\em Everything Is Fields} ...  
%Now I am in the grip of a new vision, that {\em Everything Is Information.}
%The more I have pondered the mystery of the quantum and our strange 
%ability to comprehend this world in which we live, the more I see 
%possible fundamental roles for logic and information as the
%bedrock of physical theory.
%
%\end{slide}

\begin{slide}

{\large
Shannon
}

Shannon's 1948 paper ``A mathematical theory of communication'' founded
the field of information theory.  It contained two theorems that we will
discuss the quantum analogs of today: Source Coding and Channel Coding.

\noindent
{\bf Source Coding}\\
Asymptotically, $n$ symbols from a source $X$ can be compressed to 
length $n H(X) + O(\sqrt{n})$.

For a memoryless source, which emits i.i.d. signals where signal $x_i$
has probability $p_i$, the entropy $H$ is:
\[
H(X) = \sum_i -p_i \log p_i
\]

This lecture will deal only with memoryless sources and channels.
\end{slide}

\begin{slide}
\noindent
{\bf Channel Coding}\\
A noisy channel $N$ has capacity
\[
\max_{p(X)} \ \ I(X;N(X)),
\] where $p(X)$ is maximized over all probability distributions on
the channel input and $I(X;N(X))$ is the mutual information between
the input and the output. 

\begin{eqnarray*}
I(X;Y) &=& H(Y)- H(Y|X) \\ &=& H(X) + H(Y) - H(X,Y). 
\end{eqnarray*}

\[
H(X) = \sum_i -p_i \log p_i
\]

\end{slide}

\begin{slide}
Entropy of a quantum state

Classical Case\\
Given $n$ photons, each in state $\ket{\vpols}$ or $\ket{\hpols}$, with 
probability
$\frac{1}{2}$.  Any two of these states are completely distinguishable.
The entropy is $n$ bits.

Quantum Case\\
Given $n$ photons, each in state $\ket{\vpols}$ or $\ket{\ppols}$, with 
probability
$\frac{1}{2}$.  If the angle between the polarizations is small,
any two of these states are barely distinguishable.
Intuitively, the entropy should be much less than $n$ bits.

\end{slide}

\begin{slide}

By thermodynamic arguments, von Neumann deduced the entropy of
a quantum system with density matrix $\rho$ is
\[
H_\mathrm{vN}(\rho) = -\mathrm{Tr} ( \rho \log \rho )
\]

Recall $\rho$ was positive semideifinite, so $\rho \log \rho$ is
defined.

If $\rho$ is diagonal with eigenvalues $\lambda_i$, then
$\rho \log \rho$ is diagonal with eigenvalues $\lambda_i \log \lambda_i$.\\
Thus, $H_\mathrm{vN}(\rho) = H_\mathrm{Shan}(\lambda_i)$ so the 
von Neumann entropy is the Shannon entropy of the eigenvalues.\\
(Recall $\mathrm{Tr} \rho = 1 = \sum_i \lambda_i$.)

You can ask: is this the right definition for information theory?
\end{slide}

\begin{slide}
{\large Schumacher Compression}\\
(Quantum source coding theorem)

Given a memoryless source producing pure
states $v_1$, $v_2$, $v_3$, $\ldots$
with probabilities $p_1$, $p_2$, $p_3$, $\ldots$.\\

We want to send them to a receiver using as few qubits as possible.

Theorem (Schumacher, 1994):\\
You can send $n$ symbols using
\[
n H_\mathrm{vN}(\rho) + o(n)
\]
qubits, with fidelity approaching 1 as $n \rightarrow \infty$, where
$\rho = \sum_i p_i v_i v_i^\dag$ is the density matrix of the source.
\end{slide}

\begin{slide}
{\large Fidelity}

Classical source coding works with high probability: the probability
that the received sequeuce is exactly the signal goes to 1 as the block
length $n$ goes to $\infty$.  

This is too strong a criterion for quantum source coding.  We ask that
the {\em fidelity} between the signal sent and the received state $\rho$
goes to 1 as the block length $n$ goes to $\infty$.

The fidelity between a pure state sent $\ket{v}$ and a received density 
matrix $\rho$ is $\bra{v} \rho \ket{v} = v^\dag \rho v$.  

If the fidelity goes to 1, any measurement on the received signal $\rho$
will have almost the same probability distribution of outcomes as the
same measurement on $v v^\dag$, the state sent.  
\end{slide}

\begin{slide}
{\large
Proof of Classical Source Coding Theorem
}

Assume we have a source $X$ emitting symbols $s_1$, $s_2$, $\ldots$
with probabilities $p_1$, $p_2$, $\ldots$.  Consider a sequence of
$n$ symbols from this source.

Then a {\em typical sequence} has close to the right number ($n p_i$) of
each symbol $s_i$.

Theorem: Almost all the time, the source emits a typical sequence.  There
are $2^{n H_\mathrm{Shan}(X)+o(n)}$ typical sequences.

\end{slide}


\begin{slide}
Typical Subspaces

Have states $v_1$, $v_2$, $\ldots$, $v_k$ with probabilities $p_1$, 
$p_2$, $\ldots$, $p_k$.

Look at eigenvectors of density matrix $\rho$. \\
Assign to each of eigenvector a probability equal to the
corresponding eigenvalue.\\
Any two eigenvectors are orthogonal.\\
Let the eigenvectors be $\hat{v}_1$, $\hat{v}_2$, $\ldots$, $\hat{v}_d$ 
with probabilities $\hat{p}_1$, 
$\hat{p}_2$, $\ldots$, $\hat{p}_d$.

Suppose we have $n$ of these states.\\
The {\em typical subspace} $S$ is the subspace generated by typical
sequences of eigenvectors.

$S$ has dimension $2^{H_{\mathrm{vN}}(\rho)n + o(n)}$. 
\end{slide}


\begin{slide}
How to do Schumacher compression.

Have states $v_1$, $v_2$, $\ldots$, $v_k$ with probabilities $p_1$, 
$p_2$, $\ldots$, $p_k$.  These give density matrix $\rho$.  Let
$S$ be the
typical subspace of $\rho^{\otimes n}$.

To compress:\\
Measure whether output of source lies in $S$.\\  
If {\em yes}, 
get the state projected onto $S$.  Can send using $\log \dim S \approx
n H_\mathrm{vN}(\rho)$ qubits.\\
If {\em no},
this is a low probability event; send anything.
\end{slide}

\begin{slide}
Why does Schumacher Compression work?

Recall that the density matrix determines the outcomes of any experiment.

Using the eigenvectors $\hat{v}_1$, $\hat{v}_2$, $\ldots$ $\hat{v}_d$ with
probabilities $\hat{p}_1$, $\hat{p}_2$, $\ldots$ $\hat{p}_d$ gives same 
probability of the outcomes as using states 
${v}_1$, ${v}_2$, $\ldots$ ${v}_k$ with
probabilities ${p}_1$, ${p}_2$, $\ldots$ ${p}_k$, since these two sources
have the same density matrix.

We know from the classical
theory of typical sequences that the probability of
a {\em no} outcome is very small using $\hat{v}_i$ and $\hat{p}_i$.  
Thus, the probability of a {\em no} outcome is also very
small with $v_i$ and $p_i$.

This implies that the original state is almost surely very close to
the typical subspace $S$.  Sending the state projected into $S$ gives
the right outcomes with high fidelity.
\end{slide}

\begin{slide}
{\large Accessible Information}

Suppose that we have a source that outputs signal $\rho_i$ with probability
$p_i$.  How much Shannon information can we extract about the sequence of
$i$'s?

Let $X$ be the random variable telling which signal $\rho_i$ was sent.

Optimize over all possible measurements $M$ on the signals (with outcomes
$M_1$, $M_2$, $\ldots$).

\[
I_{\mathrm{acc}} = \max_M I(X,M)
\]

\end{slide}

\begin{slide}
{\large Example 1:} Two states in ensemble

\begin{center}
$v_1 = $ \ \ \epsfxsize=.25in \epsfbox{vert.eps} \ \ \ 
$v_2 = $ \ \ \epsfxsize=.25in \epsfbox{diag.eps} \ \ \ 
\end{center}

{\tiny
\[
v_1 = \left(\begin{array}{c}1 \\ 0\end{array}\right)\quad
v_2 = \left(\begin{array}{c}\cos(\theta) \\ \sin(\theta)\end{array}\right)
\]
}
Then
\[
\rho = \frac{1}{2}\left(\begin{array}{cc}
1+\cos^2\theta & \sin\theta\cos\theta\\
\sin\theta\cos\theta & 1 - \cos^2\theta
\end{array}
\right)
\]
and $H_\mathrm{vN} = H(\frac{1}{2} + \frac{\cos\theta}{2})$.
The optimal measurement is

\epsfxsize=1in
\epsfbox{opt-meas.eps}
and $I_\mathrm{acc} = 1 - H(\frac{1}{2} + \frac{\sin\theta}{2})$.


\end{slide}

\begin{slide}

We see that $I_\mathrm{acc} < H_\mathrm{vN}(\rho)$.\\

\epsfxsize=3in
\epsfbox{graph.eps}

A plot of $H_\mathrm{vN}$ and $I_\mathrm{acc}$ for
the ensemble of two pure quantum
states with equal probabilities
that differ by an angle of $\theta$, $0 \leq \theta \leq \pi/2$.

The top curve is the von Neumann entropy 
$H_\mathrm{vN} = H(\frac{1}{2} + \frac{\cos\theta}{2})$
and the bottom the accessible information
$I_\mathrm{acc} = 1 - H(\frac{1}{2} + \frac{\sin\theta}{2})$.


\end{slide}

\begin{slide}
{\large POVM Measurements}\\
(Positive Operator Valued Measurements).

We are given a set of positive semidefinite matrices $E_i$
satisfying $\sum_i E_i = I$.

The probability of the $i$'th outcome is
\[
p_i = \mathrm{Tr} ( E_i \rho)
\]

For von Neumann measurements, $E_i = \Pi_{S_i}$

To obtain the maximum information, we can assume that $E_i$'s are pure
states.  Then $E_i = v_i v_i^\dag$ for some vector $v_i$.
\end{slide}


\begin{slide}
{\large Example 2:}
Three signal states differing by $60^\circ$.

\epsfxsize=1.5in
\hspace*{.5in}
$v_i$: \ \ \epsfbox{three-vecs.eps} \ \ (prob $\frac{1}{3}$)
\[
v_1 = \left(\begin{array}{c}1 \\ 0\end{array}\right)\quad
v_2 = \left(\begin{array}{c}-1/2 \\ \sqrt{3}/2\end{array}\right)\quad
v_3 = \left(\begin{array}{c}-1/2 \\ -\sqrt{3}/2\end{array}\right)
\]

Optimal Measurement:\\
POVM corresponding to vectors $w_i \perp v_i$.\\
$E_i = \frac{2}{3}w_i w_i^\dag$

\epsfxsize=1.5in
\hspace*{.5in}$w_i$:\ \ \epsfbox{dual-vecs.eps}\ \  (prob $\frac{1}{3}$)\\
Each outcome rules out one state, leaving the other two equally likely\\
\hspace*{5ex}$I_\mathrm{acc} = \log 3 - 1 = .585$; $\qquad$
$H_\mathrm{vN} = 1$\\
Again, we have $I_\mathrm{acc} \leq H_\mathrm{vN}$.
\end{slide}

\begin{slide}
{\large Holevo Bound $\chi$}

Suppose we have a source emitting $\rho_i$ with probability $p_i$.

\[
\chi = H_\mathrm{vN}(\sum_i p_i\rho_i) - \sum_i p_i H_\mathrm{vN}(\rho_i)
\]

Theorem (Holevo, 1973)\\
$I_\mathrm{acc} \leq \chi$

If all the $\rho_i$ commute, the situation is essentially classical,
and we get $I_\mathrm{acc} =\chi$.  Otherwise
$I_\mathrm{acc} < \chi$.
\end{slide}

\begin{slide}
Is this the most information we can send using the three states of 
example 2?

{\large Answer: No!}

Use just two of the states, each with probabilities $1/2$
\[
v_1 = \left(\begin{array}{c}1 \\ 0\end{array}\right)\quad
v_2 = \left(\begin{array}{c}-1/2 \\ \sqrt{3}/2\end{array}\right)
\]

The measurement from example 1 now gives $.6454$ bits of information
about the random variable identifying the state which was sent.
\end{slide}

\begin{slide}
Is {\em this} the most information we can send using the three states of 
example 2?

{\large Answer: No!}

Use three codewords $v_1v_1$, $v_2v_2$, $v_3v_3$.

The optimal measurement for these three states gives 
1.369 bits,\\ which is larger than $2 \cdot .6454 = 1.298$.

\epsfxsize=1.3in
\hspace*{1in}\epsfbox{PGM.eps}

What about still longer codewords?

\end{slide}

\begin{slide}
{\large Theorem} (Holevo, Schumacher-Westmoreland)

The classical-information capacity obtainable using codewords composed of
signal states $\rho_i$, where $\rho_i$ has marginal probability $p_i$,
is 

\[
\chi(\{\rho_i\}; \{p_i\}) = H_\mathrm{vN}(\sum_i p_i\rho_i) 
- \sum_i p_i H_\mathrm{vN}(\rho_i)
\]

We will give sketch of the proof of this formula in the special
case of pure states $\rho_i$.

Does this give the capacity of a quantum channel $\cal{N}$?

Possible capacity formula:\\
Maximize $\chi(\{{\cal{N}}(\rho_i)\}; \{p_i\})$ over all output states 
${\cal N}(\rho)$ of the channel.
\end{slide}

\begin{slide}
Theorem (pure state capacity)

We are given pure quantum states $v_1$, $v_2$, $\ldots$, $v_k$ for use
as signals.  
Let $\rho = \sum_i p_i v_i v_i^\dag$.
There are codes such that we send state $v_i$ with probability $p_i$
having asymptotic capacity $\chi = H_\mathrm{vN} (\rho)$

How do we prove this?

\begin{itemize}
\item random coding
\item typical subspace
\item ``pretty good measurement''\\
 \quad\quad  also called square root measurement
\end{itemize}
\end{slide}

\begin{slide}
Random Coding

We choose codewords
\[
u_i = v_{i_1} \otimes v_{i_2} \otimes \ldots \otimes v_{i_n} 
\]
where $v_i$ is picked with probability $p_i$ for each signal.

Then $u_i$ will be close to the typical subspace of $\rho^{\otimes n}$.

To decode, we 
\begin{itemize}
\item project into the typical subspace
\item apply the ``pretty good measurement''
\end{itemize}
\end{slide}


\begin{slide}
{\large Pretty good measurement}

We have N vectors $\tilde{u}_i \in S$, which occur with equal probability
$\frac{1}{N}$.  Given one of these, we want to distinguish between them.\\
Let $\phi = \sum_i \tilde{u}_i \tilde{u}_i^\dag$

Measure using the POVM with elements 
\[
E_i = \phi^{-1/2} \tilde{u}_i \tilde{u}_i^\dag \phi^{-1/2}
\]
This is a POVM since
\[
\sum_i E_i = \sum_i \phi^{-1/2}\,  \tilde{u}_i \tilde{u}_i^\dag\,  \phi^{-1/2} =I
\]

The probability of error if the state ${u}_i$ is sent is
$1-(\tilde{u}_i^\dag \phi^{-1/2} \tilde{u}_i)^2$.

This can be shown to be small for most $u_i$ from a random code
if $N < \dim S - o(\dim S)$.
\end{slide}


\begin{slide}
Description of arbitrary memoryless quantum channel ${\cal N}$:
${\cal N}$ must be trace-preserving completely positive operator.

Positive: takes positive semi-definite matrices to 
positive semi-definite matrices.

Completely postive: is positive even when tensored with the identity
channel. (E.g., the transpose operation is positive but not completely
positive).

A trace preserving completely positive operator can always 
be expressed as
\[
\rho \longrightarrow {\cal N}(\rho) = \sum_i A_i \rho A_i^\dag
\]
where 
\[
\sum_i A_i^\dag A_i = I
\]

\end{slide}

\begin{slide}
{\large Unentangled Inputs, Separate Measurements}

\epsfxsize=2.5in
\epsfbox{bigfig1.eps}

Maximize over probability distributions on inputs to
the channel $\rho_i$, $p_i$:
\[
I_{\mathrm{acc}}(\{{\cal N}(\rho_i)\}; \{p_i\}) 
\]
\end{slide}

\begin{slide}
{\large Unentangled Inputs, Joint Measurements}

\epsfxsize=2.5in
\epsfbox{bigfig2.eps}

Maximize over probability distributions on inputs to
the channel $\rho_i$, $p_i$:
\[
\chi(\{{\cal{N}}(\rho_i)\}; \{p_i\})
\]
\end{slide}

\begin{slide}
{\large Entangled Inputs, Joint Measurements}

\epsfxsize=2.5in
\epsfbox{bigfig3.eps}

Maximize over probability distributions on inputs to
the channel $\rho_i$, $p_i$ where $\rho_i$ is in the 
tensor product space of $n$ inputs: 
\[
\lim_{n\rightarrow \infty} \frac{1}{n}\chi(\{{\cal{N}}^{\otimes n}(\rho_i)\}; \{p_i\})
\]

\end{slide}

\begin{slide}
{\large Open Question}

Is channel capacity additive?

Is $
\max \ \chi({\cal N}_1 \otimes {\cal N}_2) = 
\max \ \chi({\cal N}_1) + \max \ \chi({\cal N}_2)$?

If it is, then $\chi$ gives the classical-information
 capacity of a quantum channel.

This turns out to be the same question as additivity of entanglement of
formation considered in the previous lecture.
\end{slide}

\begin{slide}
What things might increase the capacity of a quantum channel which don't
affect the capacity of a classical channel?

\begin{itemize}
\item Entanglement between different channel uses?  Unknown.  This is the
big open additivity question.
\item A classical back channel from the receiver to the sender?  This helps,
but seems to make exact calculation of the capacity impossible.
\item Prior entanglement shared between the sender and the receiver.  
This helps and makes the formulas really nice.
\end{itemize}

\end{slide}

\begin{slide}
Recall superdense coding lets you send two bits per qubit
over a noiseless quantum channel if the sender and receiver share
entanglement. 

\epsfxsize=2in
\epsfbox{superdense-color.eps}
By Holevo's theorem, the bound without prior shared entanglement is
one bit per qubit.  Thus, for the noiseless quantum channel (the simplest
case possible) entanglement between sender and receiver helps.

\end{slide}

\begin{slide}
Suppose that we have a quantum channel ${\cal N}$.  From
superdense coding, if 
${\cal N}$ is a noiseless quantum channel, 
the sender could communicate twice as much classical information to a 
receiver if they share EPR pairs than if they don't.  How does this
generalize to noisy channels?  We call this quantity the 
entanglement-assisted capacity and denote it by $C_E$.

By superdense coding and teleportation, the entanglement-assisted
quantum capacity is exactly half of the entanglement-assisted classical
capacity.
\[
Q_E = \frac{1}{2} C_E\
\]
\end{slide}

\begin{slide}
{\large Formula for entanglement-assisted capacity}

Theorem (Bennett, Shor, Smolin, Thapliyal)\\
\[
\max_\rho H_\mathrm{vN}( {\cal{N}} (\rho))
+ H_\mathrm{vN}( \rho)
 - H_\mathrm{vN}( {(\cal{N} \otimes I)} )(\Phi_\rho))
\]
$\Phi_\rho$ is a pure state on the tensor product of the input space
of the channel and a quantum space that the sender keeps, with
\[
{\mathrm{Tr}}_B \Phi_\rho = \rho . 
\]  When the
channel is classical, this formula turns into the entropy of the input
plus the entropy of the output less the entropy of the joint system, 
or the second expression for classical mutual information.  

\end{slide}

\begin{slide}

{\large Generalization}

Suppose that the sender and the receiver have a limited amount of
entanglement (E ebits) they share.  How much can capacity can they obtain
from a quantum channel?

If the sender is not allowed to use entanglement between different
channel uses, the answer is:

\[
\max_{\rho_i : \bar{H}(\rho_i) \leq E}
\bar{H}(\rho_i) + H({\cal N}(\bar{\rho_i}))
- \bar{H}( {(\cal{N} \otimes I)} \Phi_{\rho_i})
\]

Here $\bar{H}$ means average over the entropy, and $\bar{\rho_i}$ means 
average over the state;  $\Phi_{\rho_i}$ is the pure entangled state 
(shared between sender and receiver) whose partial traces are $\rho_i$.  
This formula interpolates between the Holevo-Schumacher-Westmoreland
capacity and the entanglement-assisted capacity.
\end{slide}

\begin{slide}
How to prove the formula for $C_E$ \\(the lower bound)

\[
\max_\rho H_\mathrm{vN}( {\cal{N}} (\rho))
+ H_\mathrm{vN}( \rho)
 - H_\mathrm{vN}( {(\cal{N} \otimes I)} )(\Phi_\rho))
\]

Suppose $\rho$ is $\frac{1}{d}\,\mathrm{Id}$, a multiple of the
identity.  Then we do the same operations as for standard
superdense coding, with the generalizations of the Pauli matrices.

Use Holevo formula for $\chi$:
\[
\chi(\{\rho_i\}; \{p_i\}) = H_\mathrm{vN}(\sum_i p_i\rho_i)
- \sum_i p_i H_\mathrm{vN}(\rho_i)
\]

The first term of $\chi$ gives the first two terms of $C_E$; the
second term of $\chi$ gives the last term of $C_E$.

\end{slide}

\begin{slide}
The Holevo formula for $\chi$:
\[
\chi(\{\rho_i\}; \{p_i\}) = H_\mathrm{vN}(\sum_i p_i\rho_i)
- \sum_i p_i H_\mathrm{vN}(\rho_i)
\]

Recall in superdense coding Alice and Bob share a maximally entangled
state.  Alice applies a random Pauli matrix to her half and sends it
through the channel.

The first term is the entropy of the average state of both halves of 
the maximally entangled state after this operation.  But on average,
the random Pauli matrix turns both halves into maximally mixed states,
turning the first term of $\chi$ into
\[
H_\mathrm{vN}( {\cal{N}} (\rho))
+ H_\mathrm{vN}( \rho)
\]
where $\rho$ is $\frac{1}{d}I$.

\end{slide}
\begin{slide}

Proof sketch if $\rho \ne \mathrm{Id}$.

If $\rho$ is a projection matrix, things work the same way as in
the identity case.

If $\rho$ is not a projection matrix, then take tensor product of
$n$ uses of the channel, ${\cal N}^{\otimes n}$, and the projection matrix
$\rho = \pi_T$, where $T$ is a typical subspace for $\rho^{\otimes n}$.

It turns out we need to show that
\[
\lim_{n \rightarrow \infty} 
\frac{1}{n} H_\mathrm{vN}({\cal N}^{\otimes n}(\pi_T))
= H_\mathrm{vN}({\cal N}(\rho)).
\]
This is intuitively true, but not that easy to prove rigorously.

\end{slide}


\begin{slide}
What things might increase the entanglement-assisted
capacity of a quantum channel which don't
affect the capacity of a classical channel?

\begin{itemize}
\item Entanglement between different channel uses?  Does not help!
\item A classical back channel from the receiver to the sender? 
Does not help! 
\item Both of the above simultaneously?  Does not help!
\end{itemize}

Proofs via quantum reverse Shannon theorem (next slide).

\end{slide}


\begin{slide}
Quantum Reverse Shannon Theorem:

In the presense of entanglement, a noiseless qubit channel can 
simulate $n$ uses of any quantum channel with entanglement-assisted
capacity $C_E$ by sending $nC + o(n)$ qubits. 

This conjecture would show that asymptotically, in the presense of 
free entanglement, quantum channels are characterized by one 
parameter, $C_E$.  The analogous theorem is true for classical channels 
in the presense of a correlated source of random bits.  

With Charlie Bennett, Igor Devetak, and Andreas Winter, we have a proof 
of this theorem 
for channels (1) transmitting signals generated by some stochastic source,
or (2) transmitting tensor product states.

Does not appear to be quite true for general inputs, unless we allow
for other forms of shared entanglement than EPR pairs.
\end{slide}

\begin{slide}
Quantum capacity

The quantum capacity is defined as $\lim_{n \rightarrow \infty} \log d/n$,
where $n$ is the number of channel uses in a protocol, and the $d$ is the
dimension of the largest Hilbert space which can be transmitted through
the channel such that the fidelity of transmission of 
the (average/lowest fidelity) state in it is $1-\epsilon$, for some fixed
$\epsilon$.  

The quantum capacity of a channel can be shown to be
\[
\lim_{n \rightarrow \infty} \frac{1}{n} \max_\rho 
H( {\cal{N}}^{\otimes n} (\rho))
 - H( ({\cal{N}}^{\otimes n} \otimes I) )(\Phi_\rho))
\]
where $\Phi_\rho$ is a state whose reduced state on the input space 
is $\rho$.  


\end{slide}

\end{document}