diff --git a/paper/abstract.tex b/paper/abstract.tex
index bffa9dfec999b086955564bce8738ad52ac88bad..2a88a2724f85c40a094d42b31330bde53e07d6a9 100644
--- a/paper/abstract.tex
+++ b/paper/abstract.tex
@@ -1,7 +1,7 @@
 Gradually typed languages are designed to support both dynamically typed
 and statically typed programming styles while preserving the benefits of
 each.  While existing gradual type soundness theorems for these
-languages aim to show that ``type-based reasoning'' is preserved when
+languages aim to show that type-based reasoning is preserved when
 moving from the fully static setting to a gradual one, these theorems do
 not imply that correctness of type-based refactorings and optimizations
 is preserved.  Establishing correctness of program transformations is
diff --git a/paper/gtt.tex b/paper/gtt.tex
index 97f5d8dae383a82473e79e5ac304cbd3e8c7c6a7..41d286a63f76c48dfd479506cf7b9ff288a5d787 100644
--- a/paper/gtt.tex
+++ b/paper/gtt.tex
@@ -12,8 +12,8 @@
 \newif\ifshort
 \newif\iflong
 %% Pick long or short version:
-\shorttrue
-% \longtrue
+%% \shorttrue
+\longtrue
 
 %% Note: Authors migrating a paper from PACMPL format to traditional
 %% SIGPLAN proceedings format should change 'acmlarge' to
@@ -757,9 +757,11 @@ functions.
 Our modular proof architecture allows us to easily prove correctness
 of $\beta, \eta$ and graduality for all of these interpretations.
 
+\begin{shortonly}
 \textbf{Extended version:} As supplementary materials, we attach an
 extended version of the paper, which has omitted cases of definitions,
 lemmas, and proofs.  
+\end{shortonly}
 
 % While gradual typing has been researched quite extensively by proving
 % safety theorems, these safety theorems are too weak to justify program
@@ -1115,7 +1117,7 @@ function \emph{type} internalizing this judgement), which can be treated
 like values by a compiler because they have no effects (e.g. they can be
 dead-code-eliminated, common-subexpression-eliminated, and so on).
 \begin{longonly}
-In focusing~\cite{andreoli,something-intuitionistic} terms, complex
+In focusing~\cite{andreoli92focus} terminology, complex
 values consist of left inversion and right focus rules.
 \end{longonly}
 For each pattern-matching construct (e.g. case analysis on a sum,
@@ -2790,7 +2792,7 @@ will.
 
 \begin{longproof}~\\
 
-  \begin{itemize}
+  \begin{enumerate}
   \item Sums upcast.  We use Lemma~\ref{lem:upcast} with the type
     constructor $X_1 \vtype, X_2 \vtype \vdash X_1 + X_2 \vtype$.
     Suppose $A_1 \ltdyn A_1'$ and $A_2 \ltdyn A_2'$ and let
@@ -3349,7 +3351,7 @@ will.
     immediate by $\eta$ for 0 on the map $z : 0 \vdash \upcast{0}{A}z :
     A$.
 
-  \end{itemize}
+  \end{enumerate}
 \end{longproof}
 
 \begin{longonly}
@@ -4141,9 +4143,8 @@ The rules for recursive types are in the extended version.
       \text{Both}  & E & \bnfadd & \pmmuXtoYinZ V x E
   \end{array}
   \]
-  \[
-  \begin{array}{l}
-    \inferrule
+  \begin{mathpar}
+  \inferrule
     {\Gamma \vdash V : A[\mu X. A/X]}
     {\Gamma \vdash \rollty{\mu X. A} V : \mu X.A}
     \qquad
@@ -4152,7 +4153,7 @@ The rules for recursive types are in the extended version.
       \Gamma, x : A[\mu X.A/X] \pipe \Delta \vdash E : T
     }
     {\Gamma\pipe\Delta \vdash \pmmuXtoYinZ V x E : T}
-    \qquad
+
     \inferrule
     {\Gamma \mid \Delta \vdash M : \nu \u Y. \u B}
     {\Gamma \mid \Delta \vdash \unroll M : \u B[\nu \u Y. \u B]}
@@ -4180,9 +4181,8 @@ The rules for recursive types are in the extended version.
 
     \framebox{Recursive Type Axioms}
     \medskip
-  \end{array}
-  \]
-  
+  \end{mathpar}
+    
   \begin{tabular}{c|c|c}
     Type & $\beta$ & $\eta$\\
     \hline
@@ -4624,7 +4624,7 @@ This leads to the following definition:
   \[ \texttt{VarArg} = \nu \u Y'. \u Y \with (X \to \u Y') \]
 
   Then we define an open version of $\dynv, \dync$ with respect to a
-  variable representing $\dynv$:
+  variable representing the occurences of $\dynv$ in $\dync$:
   \begin{align*}
     X \vtype \vdash \dynv_o &= \texttt{Tree}[(1+1) + U \dync_o] \ctype\\
     X \vtype \vdash \dync_o &= \texttt{VarArg}[\u F X/\u Y] \ctype\\
@@ -4693,24 +4693,23 @@ variable arity functions!
 
 What's least clear is \emph{why} the type
 \[
-\texttt{VarArg}[X][Y] = \nu \u Y'. (X \to \u Y') \with \u Y
+\texttt{VarArg}[X][\u Y] = \nu \u Y'. (X \to \u Y') \with \u Y
 \]
 Should be thought of as a type of variable arity functions.
 %
 First consider the infinite unrolling of this type:
 \[
-\texttt{VarArg}[X][Y] \simeq \u Y \with (X \to \u Y) \with (X \to X \to \u Y) \with \cdots
+\texttt{VarArg}[X][\u Y] \simeq \u Y \with (X \to \u Y) \with (X \to X \to \u Y) \with \cdots
 \]
 this says that a term of type $\texttt{VarArg}[X][Y]$ offers an
 infinite number of possible behaviors: it can act as a function from
-$X^n \to Y$ for any $n$.
+$X^n \to \u Y$ for any $n$.
 %
 Similarly in Scheme, a function can be called with any number of
 arguments.
 %
 Finally note that this type is isomorphic to a function that takes a
 \emph{cons-list} of arguments:
-
 \begin{align*}
   &\u Y \with (X \to \u Y) \with (X \to X \to \u Y) \with \cdots\\
   &\cong(1 \to \u Y) \with ((X \times 1) \to \u Y) \with ((X \times X \times 1) \to \u Y) \with \cdots\\
@@ -4763,7 +4762,7 @@ in the ep pairs used in Definition~\ref{def:scheme-like-type-interp}.
   \begin{longonly}
     \\
   \inferrule
-  {}
+  { }
   {\Gamma \vdash \tru, \fls : \bool}
 
   \inferrule
@@ -4772,6 +4771,7 @@ in the ep pairs used in Definition~\ref{def:scheme-like-type-interp}.
     \Gamma \vdash E_f : T}
   {\Gamma \pipe \Delta \vdash \ifXthenYelseZ V {E_t} {E_f} : T}
 
+  \\
   \ifXthenYelseZ \tru {E_t} {E_f} \equidyn E_t\and
   \ifXthenYelseZ \fls {E_t} {E_f} \equidyn E_f\\
   x : \bool \vdash E \equidyn \ifXthenYelseZ x {E[\tru/x]} {E[\fls/x]}\\
@@ -4978,7 +4978,7 @@ interpretation.
       \thunk\\
       &&\pairone{\force \supcast{\u B_1}{\u B_1'}{(\thunk \pi\force x)}}\\
       &&\pairtwo{\force \supcast{\u B_2}{\u B_2'}{(\thunk \pi'\force x)}}\\
-      \bullet \vdash \sdncast{A \to \u B}{A' \to \u B'} &=& \lambda x:A. \sdncast{\u B}{\u B'}{(\bullet\, x)}\\
+      \bullet \vdash \sdncast{A \to \u B}{A' \to \u B'} &=& \lambda x:A. \sdncast{\u B}{\u B'}{(\bullet\, (\supcast{A}{A'}{x}))}\\
       f : U(\sem{A} \to \sem{\u B}) \vdash
       \supcast{U(A \to \u B)}{U(A' \to \u B')}
       &=&
@@ -5322,7 +5322,7 @@ can prove that all casts as defined in Figure~\ref{fig:cast-to-contract}
 are ep pairs.  Before doing so, we prove the following lemma, which is
 used for transitivity (e.g. in the $A \ltdyn \dynv$ rule, which uses a
 composition $A \ltdyn \floor{A} \ltdyn \dynv$):
-\begin{lemma}[EP Pairs Compose]
+\begin{lemma}[EP Pairs Compose]\hfill
   \label{lem:ep-pairs-compose}
   \begin{enumerate}
   \item If $(V_1, S_1)$ is a value ep pair from $A_1$ to $A_2$ and
@@ -5370,7 +5370,7 @@ Now, we show that all casts are ep pairs.
 %
 The proof is a somewhat tedious, but straightforward calculation.
 
-\begin{lemma}[Casts are EP Pairs]
+\begin{lemma}[Casts are EP Pairs]\hfill
   \label{lem:casts-are-ep-pairs}
   \begin{enumerate}
   \item For any $A \ltdyn A'$, the casts $(x.\sem{\upcast{A}{A'}x},
@@ -5440,7 +5440,7 @@ The proof is a somewhat tedious, but straightforward calculation.
         \bindXtoYinZ \bullet x' \caseofXthenYelseZ {x'} {x_1'. \ret\inl x_1'} {x_2'. \ret \inr x_2'}\tag{IH projection}\\
         &\equidyn \bindXtoYinZ \bullet x' \ret x'\tag{$+\eta$}\\
         &\equidyn \bullet \tag{$\u F\eta$}\\
-      \end{align*}
+      \end{align*}\
     \end{enumerate}
   \item $\times$:
     \begin{enumerate}
@@ -5835,17 +5835,17 @@ which says that a GTT homogeneous term dynamism is the same as a
 \end{lemma}
 \begin{proof}
   We proceed by induction on $A, \u B$, following the proof that
-  reflexivity is admissible given in \ref{lem:norm-type-dyn}.
+  reflexivity is admissible given in Lemma \ref{lem:norm-type-dyn}.
   \begin{enumerate}
   \item If $A \in \{1, \dynv \}$, then $\supcast{A}{A}[x] = x$.
   \item If $A = 0$, then $\absurd x \equidyn x$ by $0\eta$.
   \item If $A = U \u B$, then by inductive hypothesis $\sdncast{\u
-    B}{\u B} \equidyn \bullet$. By lemma \ref{ep-pair-id},
+    B}{\u B} \equidyn \bullet$. By Lemma \ref{ep-pair-id},
     $(x. x, \bullet)$ is a computation ep pair from $\u B$ to
-    itself. But by \ref{lem:casts-are-ep-pairs}, $(\supcast{U\u
+    itself. But by Lemma \ref{lem:casts-are-ep-pairs}, $(\supcast{U\u
       B}{U\u B}[x], \bullet)$ is also a computation ep pair so the
     result follows by uniqueness of embeddings from computation
-    projections \ref{lem:adjoints-unique-cbpvstar}.
+    projections Lemma \ref{lem:adjoints-unique-cbpvstar}.
   \item If $A = A_1\times A_2$ or $A = A_1+A_2$, the result follows by
     the $\eta$ principle and inductive hypothesis.
   \item If $\u B = \dync$, $\sdncast{\dync}{\dync} = \bullet$.
@@ -6028,7 +6028,7 @@ The final lemma before the graduality theorem lets us ``move a cast''
 from left to right or vice-versa, via the adjunction property for ep
 pairs.  
 %
-These arise in the proof cases for return and thunk, because in those
+These arise in the proof cases for $\kw{return}$ and $\kw{thunk}$, because in those
 cases the inductive hypothesis is in terms of an upcast (downcast) and
 the conclusion is in terms of a a downcast (upcast).
 \begin{lemma}[Hom-set formulation of Adjunction]
@@ -6595,12 +6595,12 @@ derivation that uses complex stacks to one that uses only ``simple''
 stacks without pattern-matching and computation introduction forms.
 %
 \begin{longonly}
-  Stacks do not appear anywhere in the grammar of terms, they are
+  Stacks do not appear anywhere in the grammar of terms, but they are
 used in the equational theory (computation $\eta$ rules and error
-strictness.)
+strictness).
 \end{longonly}
 %
-This translation clarifies the behavioral meaning of complex values and
+\ This translation clarifies the behavioral meaning of complex values and
 stacks, following \cite{munchmaccagnoni14nonassociative,
   fuhrmann1999direct}, and therefore of upcasts and downcasts.
 \begin{longonly}
@@ -7113,11 +7113,11 @@ We need a few lemmas about thunkables and linears to prove that complex
 values become thunkable and complex stacks become linear.
 
 First, the following lemma is useful for optimizing programs with
-thunkable subterms.
-Intuitively, since a thunkable has ``no effects'' it can be reordered
-past any other effectful binding.
-Furhmann \citep{fuhrmann1999direct} calls a morphism that has this
-property \emph{central}.
+thunkable subterms.  Intuitively, since a thunkable has ``no effects''
+it can be reordered past any other effectful binding.  Furhmann
+\citep{fuhrmann1999direct} calls a morphism that has this property
+\emph{central} (after the center of a group, which is those elements
+that commute with every element of the whole group).
 \begin{lemma}[Thunkable are Central]
   If $\Gamma \vdash M : \u F A$ is thunkable and $\Gamma \vdash N : \u
   F A'$ and $\Gamma , x:A, y:A' \vdash N' : \u B$, then
@@ -7466,7 +7466,7 @@ composition.
     \Delta \vdash \simp E \ltdyn \simp{E'} : \simp T}$
 \end{theorem}
 \begin{longproof}
-  $\beta$ axioms need to reduce administrative redices ugh
+  %% $\beta$ axioms need to reduce administrative redices ugh
   \begin{enumerate}
   \item Reflexivity is translated to reflexivity.
   \item Transitivity is translated to transitivity.
@@ -7725,7 +7725,7 @@ write $M \step N$ with no index when the index is irrelevant.)
 \begin{lemma}[Progress]
   If $\cdot \vdash M : \u F A$ then one of the following holds:
   \begin{mathpar}
-    M = \err \and M = \ret V \text{with} V:A \and \exists M'.~ M \step M
+    M = \err \and M = \ret V \text{with} V:A \and \exists M'.~ M \step M'
   \end{mathpar}
 \end{lemma}
 \fi
@@ -7782,7 +7782,7 @@ with the other in any program text produces the same overall resulting
 computation.
 \end{longonly}
 %
-Define a context $C$ to be a term/value/stack with a single $[\cdot]$ as
+\ Define a context $C$ to be a term/value/stack with a single $[\cdot]$ as
 some subterm/value/stack, and define a typing $C : (\Gamma \vdash \u B)
 \Rightarrow (\Gamma' \vdash \u B')$ to hold when for any $\Gamma \vdash
 M : \u B$, $\Gamma' \vdash C[M] : \u B'$ (and similarly for
@@ -7798,13 +7798,19 @@ values/stacks).  Using contexts, we can lift any relation on
 \begin{small}
   \begin{mathpar}
     \begin{array}{rcl}
-    C_V  & ::= [\cdot] & \rollty{\mu X.A}C_V \mid \inl{C_V} \mid \inr{C_V} \mid \\
-       & & (C_V,V)\mid(V,C_V)\mid \thunk{C_M}
+    C_V  & ::= [\cdot] & \rollty{\mu X.A}C_V \mid \inl{C_V} \mid \inr{C_V} \mid (C_V,V)\mid(V,C_V)\mid \thunk{C_M}\\
     \\
-    C_M & ::= & [\cdot] \mid \letXbeYinZ {C_V} x M \mid \letXbeYinZ V x C_M \mid \pmmuXtoYinZ {C_V} x M \mid\pmmuXtoYinZ V x C_M \mid \rollty{\nu \u Y.\u B} C_M \mid \unroll C_M \mid \abort{C_V} \mid \\
-    & & \caseofXthenYelseZ {C_V} {x_1. M_1}{x_2.M_2} \mid\caseofXthenYelseZ V {x_1. C_M}{x_2.M_2} \mid\caseofXthenYelseZ V {x_1. M_1}{x_2.C_M} \mid \pmpairWtoinZ {C_V} M \mid \pmpairWtoinZ V C_M \mid \pmpairWtoXYinZ {C_V} x y M\mid \pmpairWtoXYinZ V x y C_M
+    C_M & ::= & [\cdot] \mid \letXbeYinZ {C_V} x M \mid \letXbeYinZ V x
+    C_M \mid \pmmuXtoYinZ {C_V} x M \mid\pmmuXtoYinZ V x C_M \mid \\
+    & & \rollty{\nu \u Y.\u B} C_M \mid \unroll C_M \mid \abort{C_V} \mid \\
+    & & \caseofXthenYelseZ {C_V} {x_1. M_1}{x_2.M_2}
+    \mid\caseofXthenYelseZ V {x_1. C_M}{x_2.M_2} \mid\caseofXthenYelseZ
+    V {x_1. M_1}{x_2.C_M} \mid\\
+    & & \pmpairWtoinZ {C_V} M \mid \pmpairWtoinZ V C_M \mid \pmpairWtoXYinZ {C_V} x y M\mid \pmpairWtoXYinZ V x y C_M
     \mid \force{C_V} \mid \\
-    & & \ret{C_V} \mid \bindXtoYinZ{C_M}{x}{N} \mid\bindXtoYinZ{M}{x}{C_M} \mid \lambda x:A.C_M \mid C_M\,V \mid M\,C_V \mid \pair{C_M}{M_2}\mid \pair{M_1}{C_M} \mid \pi C_M \mid \pi' C_M
+    & & \ret{C_V} \mid \bindXtoYinZ{C_M}{x}{N}
+    \mid\bindXtoYinZ{M}{x}{C_M} \mid \lambda x:A.C_M \mid C_M\,V \mid M\,C_V \mid\\
+    & & \pair{C_M}{M_2}\mid \pair{M_1}{C_M} \mid \pi C_M \mid \pi' C_M
     \\
     C_S &=& \pi C_S \mid \pi' C_S \mid S\,C_V\mid C_S\,V\mid \bindXtoYinZ {C_S} x M \mid \bindXtoYinZ S x C_M
     \end{array}
@@ -7848,7 +7854,7 @@ $\diverge \preceq R$ lifts to the standard notion of \emph{divergence
 \end{shortonly}
 
 \begin{longonly}
-The most famous application of this is to observational equivalence,
+The most famous use of lifting is for observational equivalence,
 which is the lifting of equality of results ($\ctxize=$), and we will show that
 $\equidyn$ proofs in GTT imply observational equivalences.
 %
@@ -7936,7 +7942,7 @@ The goal of this section is to prove that a symmetric equality $E \equidyn
 E'$ in CBPV (i.e. $E \ltdyn E'$ and $E' \ltdyn E$) implies contextual
 equivalence $E \ctxize= E'$ and that inequality in CBPV $E \ltdyn E'$
 implies error approximation $E \ctxize\ltdyn E'$, which will give operational
-graduality.
+graduality:
 \begin{longonly}
 \begin{small}
 \begin{mathpar}
@@ -8063,14 +8069,16 @@ $\wedge$ for intersection ($x (\sim \wedge \sim') y$ iff $x \sim y$ and $x \sim'
 \begin{lemma}[Contextual Decomposition Lemma] \label{lem:contextual-decomposition}
 Let $\sim$ be a reflexive relation $(= \Rightarrow \sim)$, and $\leqslant$
 be a reflexive, antisymmetric relation (${=} \Rightarrow {\leqslant}$ and
-$(\preceq \wedge {\leqslant^\circ}) \Leftrightarrow {=}$).  Then
+$(\leqslant \wedge {\leqslant^\circ}) \Leftrightarrow {=}$).  Then
 \[
 \ctxize\sim \Leftrightarrow \ctxize{(\sim \vee \leqslant)} \wedge (\ctxize{(\sim^\circ \vee \leqslant)})^\circ
 \]
 \end{lemma}
 
 \begin{proof}
-Reflexive relations form a lattice? with $\wedge$ and $\vee$ with $=$ as
+Note that despite the notation, $\leqslant$ need not be assumed to be
+transitive.  
+Reflexive relations form a lattice with $\wedge$ and $\vee$ with $=$ as
 $\bot$ and the total relation as $\top$ (e.g. $(= \vee \sim)
 \Leftrightarrow \sim$ because $\sim$ is reflexive, and $(= \wedge \sim)
 \Leftrightarrow =$).  So we have
@@ -8102,6 +8110,9 @@ So using Lemmas~\ref{lem:ctx-commutes-conjunction}, \ref{lem:ctx-commutes-dual}
 \]
 \end{proof}
 
+As a corollary, the decomposition of contextual equivalence into diverge
+approximation in \cite{ahmed06:lr} and the decomposition of dynamism in
+\cite{newahmed18} are really the same trick:
 \begin{corollary}[Contextual Decomposition] ~~~ \label{cor:contextual-decomposition}
   \begin{enumerate}
   \item $\ctxize= \mathbin{\Leftrightarrow} \ctxize{\preceq} \wedge
@@ -8111,9 +8122,6 @@ So using Lemmas~\ref{lem:ctx-commutes-conjunction}, \ref{lem:ctx-commutes-dual}
   \end{enumerate}
 \end{corollary}
 \begin{proof}
-The decomposition of contextual equivalence into
-diverge approximation in \cite{ahmed06:lr} and the decomposition of
-dynamism in \cite{newahmed18} are really the same trick.
 
 For part 1 (though we will not use this below), applying
 Lemma~\ref{lem:contextual-decomposition} with $\sim$ taken to be $=$
@@ -8229,7 +8237,7 @@ Then we establish a few basic properties of the finitized preorder.
 \begin{lemma}[Downward Closure of Finitized Preorder]
   If $M \ix\apreorder i R$ and $j\leq i$ then $M \ix \apreorder j R$.
 \end{lemma}
-\begin{longproof}
+\begin{longproof} \hfill
   \begin{enumerate}
   \item If $M \bigstepsin{i} M_i$ then $M \bigstepsin{j} M_j$ and otherwise 
   \item If $M \bigstepsin{j \leq k i} \result(M)$ then $M \bigstepsin{j} M_j$
@@ -8313,7 +8321,7 @@ relation downward closed.
 \end{shortonly}
 
 \begin{longonly}
-Next, we define the \emph{logical} preorder by induction on types and
+Next, we define the (closed) \emph{logical} preorder (for closed values/stacks) by induction on types and
 the index $i$ in figure \ref{fig:lr}.
 %
 Specifically, for every $i$ and value type $A$ we define a relation
@@ -8658,15 +8666,18 @@ Next, we show the fundamental theorem:
 \end{longproof}
 
 \begin{longonly}
-As a direct consequence we get the reflexivity of the relation.
+As a direct consequence we get the reflexivity of the relation
 \begin{corollary}[Reflexivity]
   For any $\Gamma \vdash M : \u B$, and $i \in \mathbb{N}$,
   \(\Gamma \vDash M \ilrof\apreorder i  M \in \u B.\)
 \end{corollary}
 \end{longonly}
 
-This in particular implies that the relation is reflexive ($\Gamma
-\vDash M \ilrof\apreorder i M \in \u B$ for all well-typed $M$), so we
+\begin{shortonly}
+  This in particular implies that the relation is reflexive ($\Gamma
+  \vDash M \ilrof\apreorder i M \in \u B$ for all well-typed $M$),
+\end{shortonly}
+so we
 have the following \emph{strengthening} of the progress-and-preservation
 type soundness theorem: because $\ix\apreorder i$ only counts unrolling
 steps, terms that never use $\mu$ or $\nu$ types (for example) are
@@ -9391,7 +9402,6 @@ a semantic analogue as well.
 \begin{longonly}
   \paragraph{Gradual Session Types} ~
 \end{longonly}
-
 Gradual session types~\cite{igarashi+17gradualsession} share some
 similarities to GTT, in that there are two sorts of types (values and
 sessions) with a dynamic value type and a dynamic session type.
diff --git a/paper/max.bib b/paper/max.bib
index a2cdb49488e24977d9a10abf02f560d0580ceaed..96f35bd57ee3bd8cd2b46135dfd861777283fab7 100644
--- a/paper/max.bib
+++ b/paper/max.bib
@@ -1175,3 +1175,13 @@ booktitle="Foundations of Software Science and Computation Structures",
 year="2016",
 pages="36--54",
 }
+
+@article{andreoli92focus,
+  author = {Jean-Marc Andreoli},
+  title = {Logic programming with focusing proofs in linear logic},
+  journal = {Journal of Logic and Computation},
+  year = {1992},
+  volume = 2,
+  number = 3,
+  pages = {297--347},
+}