diff --git a/paper/gtt.tex b/paper/gtt.tex
index 4278ebad90f44378dbdd833e51844967a5bde7a2..83edaaf3442cff0f109b3d92704c0f5bd962de0d 100644
--- a/paper/gtt.tex
+++ b/paper/gtt.tex
@@ -12,8 +12,8 @@
 \newif\ifshort
 \newif\iflong
 %% Pick long or short version:
-%% \shorttrue
-\longtrue
+\shorttrue
+% \longtrue
 
 %% Note: Authors migrating a paper from PACMPL format to traditional
 %% SIGPLAN proceedings format should change 'acmlarge' to
@@ -24,6 +24,8 @@
 \usepackage{tikz-cd}    % for diagrams
 \usepackage{environ}    % for long/short environments
 
+\usepackage{xcolor}
+
 % \usepackage{booktabs}   %% For formal tables:
 %                         %% http://ctan.org/pkg/booktabs
 % \usepackage{subcaption} %% For complex figures with subfigures/subcaptions
@@ -225,7 +227,7 @@
 \newcommand{\obcast}[2]{\langle{#1}\Leftarrow{#2}\rangle}
 \newcommand{\err}{\mho}
 \newcommand{\diverge}{\Omega}
-\newcommand{\print}{\text{print}\,\,}
+\newcommand{\print}{\kw{print}\,\,}
 \newcommand{\roll}{\kw{roll}}
 \newcommand{\rollty}[1]{\texttt{roll}_{#1}\,\,}
 \newcommand{\unroll}{\kw{unroll}}
@@ -857,6 +859,243 @@ of $\beta, \eta$ and graduality for all of these interpretations.
 
 \section{Axiomatic Gradual Type Theory}
 
+\begin{figure}
+  \[
+  \begin{array}{l}
+  \begin{array}{l l}
+    A ::= \colorbox{lightgray}{$\dynv$} \mid U \u B \mid 0 \mid A_1 + A_2 \mid 1 \mid A_1 \times A_2 & 
+    \u B  ::= \colorbox{lightgray}{$\dync$} \mid \u F A \mid \top \mid \u B_1 \with \u B_2 \mid A \to \u B\\
+    \Gamma ::= \cdot \mid \Gamma, x : A & 
+    \Delta  ::= \cdot \mid \bullet : \u B \\
+    \colorbox{lightgray}{$\Phi  ::= \cdot \mid \Phi, x \ltdyn x': A \ltdyn A'$} &
+    \colorbox{lightgray}{$\Psi  ::= \cdot \mid \bullet \ltdyn \bullet : \u B \ltdyn \u B'$} \\  
+    T ::= A \mid \u B & 
+    E ::= V \mid M  \\
+    \end{array}\\
+ \begin{array}{lcl}
+    V  & ::= & \colorbox{lightgray}{$\upcast A {A'} V$} \mid x \mid \abort{V} 
+               \mid \inl{V} \mid \inr{V} \mid \caseofXthenYelseZ V {x_1. V_1}{x_2.V_2} \mid \\
+       & & () \mid \pmpairWtoinZ V V' \mid (V_1,V_2) \mid \pmpairWtoXYinZ V x y V' \mid \thunk{M}
+    \\
+    M,S & ::= & \colorbox{lightgray}{$\dncast{\u B} {\u B'} M$} \mid \bullet \mid \err_{\u B} \mid \letXbeYinZ V x M \mid \abort{V} \mid \\
+    & & \caseofXthenYelseZ V {x_1. M_1}{x_2.M_2} \mid \pmpairWtoinZ V M \mid \pmpairWtoXYinZ V x y M
+    \mid \force{V} \mid \\
+    & & \ret{V} \mid \bindXtoYinZ{M}{x}{N} \mid \lambda x:A.M \mid M\,V \mid \emptypair \mid \pair{M_1}{M_2} \mid \pi M \mid \pi' M
+  \end{array}
+  \end{array}
+  \]
+  \vspace{-0.2in}
+  \caption{GTT Syntax}
+  \label{fig:gtt-syntax}
+\end{figure}
+
+\subsection{Call-by-Push-Value}
+
+Because the extensionality principles for types play a key role in our
+analysis of gradual typing, it is necessary to work in a setting where
+we can have extensionality principles for both data (e.g. sum types) and
+computations (e.g. functions).  One such setting is Levy's
+Call-by-Push-Value~\citep{levyXXcbv-book} (CBPV).  The syntax of CBPV is
+the unshaded rules in Figure~\ref{fig:gtt-syntax}, and the term typing
+rules for CBPV are the rules for these constructs in
+Figure~\ref{fig:gtt-type-dynamism-and-terms}.  We briefly review the
+basic concepts of CBPV; see ~\cite{levybook,eff,frank?,...}  for more
+details.  CBPV makes a distinction between \emph{value types} $A$ and
+\emph{computation types} $\u B$, where value types classify
+\emph{values} $\Gamma \vdash V : A$ and computation types classify
+\emph{computations} $\Gamma \vdash M : \u B$.
+
+\paragraph{Value types}
+The value types include \emph{eager} products $A_1 \times A_2$ and sums
+$A_1 + A_2$, which behave as in a call-by-value/eager language (e.g. a
+pair is only a value when its components are).  The notion of value $V$
+is more permissive than one might expect, and expressions $\Gamma \vdash
+V : A$ are sometimes called \emph{complex values} to emphasize this
+point: complex values include not only closed runtime values, but also
+open values that have free value variables (e.g. $x : A_1 , x_2 : A_2
+\vdash (x_1,x_2) : A_1 \times A_2$), and expressions that pattern-match
+on values (e.g. $p : A_1 \times A_2 \vdash
+\pmpairWtoXYinZ{p}{x_1}{x_2}{(x_2,x_1)} : A_2 \times A_1$).  Thus, the
+complex values $x : A \vdash V : A'$ are a syntactic class of ``pure
+functions'' from $A$ to $A'$ (though there is no pure function type
+internalizing this judgement), which can be treated like values by a
+compiler (e.g. they can duplicated, reordered,
+common-subexpression-eliminated because they have no effects).  In
+focusing~\cite{andreoli,something-intuitionistic} terms, complex values
+consist of left inversion and right focus rules.  Complex values are
+helpful for stating our results but are not strictly necessary; we show
+how to eliminate them in Section~\ref{sec:complex}. For each
+pattern-matching construct (case analysis on a sum, splitting a pair),
+we have both a case-analysis whose branches are values
+(e.g. $\caseofXthenYelseZ{s}{x_1.V_1}{x_2.V_2}$) and one whose branches
+are computations (e.g. $\caseofXthenYelseZ{s}{x_1.M_1}{x_2.M_2}$).  To
+abbreviate the typing rules for both in
+Figure~\ref{fig:gtt-type-dynamism-and-terms}, we use the following
+convention: we write $E$ for either a complex value or a computation,
+and $T$ for either a value type $A$ or a computation type $\u B$, and a
+judgement $\Gamma \mid \Delta \vdash E : T$ for either $\Gamma \vdash V
+: A$ or $\Gamma \mid \Delta \vdash M : \u B$ (this is a bit of an abuse
+of notation because $\Delta$ is not present in the former).
+
+\paragraph{Computation types}
+The computation types include lazy unit/products $\top$ and $\u B_1
+\with \u B_2$, which behave like in a call-by-name/lazy language (e.g. a
+component of a lazy pair is evaluated only when it is projected).
+Functions $A \to \u B$ have a value type as input and a computation type
+as a result.  We include the CBPV \emph{stack} judgement $\Gamma \mid
+\bullet : \u B \vdash S : \u B'$, which defines a syntactic class of
+linear/strict functions from $\u B$ to $\u B'$---a stack $S$ \emph{must}
+use its input ``hole'' $\bullet$ exactly once.  Just as the complex
+values $V$ are a syntactic class terms that have no effects, the stack
+judgement $S$ is a syntactic class of terms that reflect \emph{all}
+effects of their input.  Consequently, stacks satisfy number of
+equations that are useful for refactorings or in a compiler: For
+example, if $\err$ is an erroring computation, we will have a contextual
+equivalence $S[\err/\bullet] = \err$, and if $\print V;M$ prints $V$ and
+then runs $M$, we will have $S[\print V;M] = \print
+V;S[M/\bullet]$---i.e. effects can be hoisted out of stacks, because we
+know the stack will run them exactly once.  Just as complex values
+include pattern-matching, \emph{complex stacks} include pattern-matching
+on values and introduction forms for the stack's output type.  For
+example, $\bullet : \u B_1 \with \u B_2 \vdash \pair{\pi' \bullet}{\pi
+  \bullet} : \u B_2 \with \u B_1$ is a complex stack, even though it
+mentions $\bullet$ more than once, because running the computation
+requires choosing a projection, so \emph{each time it is run} it uses
+$\bullet$ exactly once.  The equational theory of effects in
+call-by-name and CBPV evaluation may at first be a bit surprising to
+those familiar with call-by-value; for example, as a consequence of
+$S[\print V;M\bullet] = \print V;S[M/\bullet]$ with the stack $\lambda
+x.\bullet$, we have $\print c; \lambda x. M = \lambda x.\print c; M$,
+intuitively because the printing only happens when the whole computation
+is forced.  This does \emph{not} imply that the corresponding equation
+holds for the call-by-value function type, which we discuss below.  In
+focusing terms, complex stacks include both left and right inversion,
+and left focus rules.  To compress the presentation, we use a typing
+judgement $\Gamma \mid \Delta \vdash M : \u B$ with a
+``stoup''~\citep{girard?}, i.e. a typing context $\Delta$ that is either
+empty or contains exactly one assumption $\bullet : \u B$, where $\Gamma
+\mid \cdot \vdash M : \u B$ is a computation, while $\Gamma \mid \bullet
+: \u B \vdash M : \u B'$ is a stack.  The typing rules for $\top$ and
+$\with$ treat the stack input variable additively (in linear logic
+terminology); functions are ordinarily multiplicative, but here, the
+stack input must occur in the function position of a function
+application, because functions are applied to values, with the
+sequencing of computations made explicit.
+
+\paragraph{Shifts}
+A central notion in CBPV is the \emph{shifts} $\u F$ and $U$, which
+mediate between value and computation types: $\u F A$ is the computation
+type of potentially effectful programs that return a value of type $A$,
+while $U \u B$ is the value type of thunked computations of type $\u B$.
+The introduction and elimination rules for $U$ are written \kw{thunk}
+and \kw{force}, and say that computations of type $\u B$ are bijective
+with values of type $U \u B$.  Note that $\force{V}$ is the prototypical
+non-stack computation ($\Delta$ is required to be empty), because
+forcing some thunk does not use the stack input.  The rules for $\u F A$
+types allow returning a value of type $A$, or sequencing a computation
+of an $\u F A$ with a computation $x : A \vdash M : \u B$ to produce a
+computation of a $\u B$.  In the equational theory of CBPV, $\u F$ and
+$U$ are \emph{adjoint}, in the sense that stacks $\bullet : \u F A
+\vdash S : \u B$ are bijective with values $x : A \vdash V : U \u B$, as
+both are bijective with computations $x : A \vdash M : \u B$.  It is
+instructive to consider what the shifts do to the units: $0$ is the
+empty value type, while $\u F 0$ classifies effectful computations that
+never return, but may perform effects, while $U \u F 0$ is the value
+type where such computations are delayed as value.  $1$ is the trivial
+value type, and $\u F 1$ is the type of computations that can perform
+effects and then ``terminate successfully'' by returning $()$, and $U \u
+F 1$ is the value type where such computations are delayed values.
+$\top$ is the trivial computation type, in which \emph{all} effectful
+computations are equal (e.g. surprisingly, printing and not printing are
+considered equivalent computations in $\top$), because a computation of
+type $\top$ can never be forced to run its effects: forcing a
+computation means running it on a stack whose output is $\u F X$ for
+some $X$, and there are no stacks from $\top$.  $U \top$ is isomorphic
+to $1$, because it is also a value type with exactly one element, so $\u
+F U \top \cong U 1$ also represents computations where different effects
+are distinguished which can terminate successfully.
+
+\paragraph{Embedding call-by-value and call-by-name}
+To translate call-by-value into CBPV, a judgement $x_1 : A_1, \ldots,
+x_n : A_n \vdash e : A$ is interpreted as a computation $x_1 : A_1,
+\ldots, x_n : A_n \vdash e^* : \u F A$, where call-by-value products and
+sums are interpreted as $\times$ and $+$, and the call-by-value function
+type $A \to A'$ as $U(A \to \u F A')$.  Thus, a call-by-value term $e :
+A \to A'$ is translated to a computation of type $e^* : \u F U (A \to \u
+F A')$, where comonad $\u F U$ offers an opportunity to perform effects
+\emph{before} returning a function value, in addition to effects that
+are performed when the function is applied (so under transalation the
+CBV terms $\print c; \lambda x. M$ and $\lambda x.\print c; M$ will not
+be contextually equivalent).  To translate call-by-name to CBPV, a
+judgement $x_1 : \u B_1, \ldots, x_n : \u B_n \vdash e : \u B$ is
+translated to $x_1 : U \u B_1, \ldots, x_n : U \u B_n \vdash e_* : \u
+B$, representing the fact that call-by-name terms are passed thunked
+arguments.  Products are translated to $\top$ and $\times$, while a CBN
+function $B \to B'$ is translated to $U \u B \to \u B'$ with a thunked
+argument.  Sums $B_1 + B_2$ are translated to $\u F (U \u B_1 + U \u
+B_2)$, making the ``lifting'' in lazy sums explicit.
+
+%% While these rules are very intuitive when the input and output types
+%% of the stack are $\u F A$ for some value type (which is always the
+%% case in call-by-value), when we have stacks between computation types
+%% that are not $\u F A$, we can observe this property to be quite
+%% strong.
+
+%% For instance, consider a stack $A \to \u B \vdash S : \u F A$ between
+%% a function type and a $\u F$ree value type.
+%% %
+%% Consider what happens when we apply the stack to a function that
+%% prints.
+%% %
+
+%% \begin{align*}
+%%   S[\lambda x : A. \print c; M] &\equidyn S[\print c; \lambda x:A. M]\\
+%%   &\equidyn \print c; S[\lambda x:A. M] \\
+%% \end{align*}
+
+%% This shows us that a stack out of a function type \emph{calls the
+%%   function exactly once}.
+%% %
+
+%% %% Generalizing further, a stack out of an iterated function type $A_1
+%% %% \to A_2 \to \u B$ \emph{supplies all of the arguments}
+%% %% if the conclusion of the stack is an F?
+
+
+\paragraph{Extensionality/$\eta$ Principles}
+CBPV accounts for the $\eta$/extensionality principles of both data
+(positive) and codata (negative) types, because data types will have
+$\eta$ principles relating them to the value assumptions in the context
+$\Gamma$, while codata types will have $\eta$ principles relating them
+to the result type of a computation.  This is the main advantage of CBPV
+over either CBV or CBN for our purposes: it has enough judgements to
+express both kinds of extensionality rules.  For example, the
+extensionality principles for sums says that any complex value or
+computation $x : A_1 + A_2 \vdash E : T$ is equivalent to
+$\case{x}{x_1.E[\inl{x_1}/x]}{x_2.E[\inr{x_2}/x]}$, i.e. a case on a
+value can be moved around a program without changing its behavior.
+Given this, the above translations of CBV and CBN into CBPV explain why
+extensionality for sums holds in CBV but not CBN: in CBV, $x : A_1 + A_2
+\vdash E : T$ is translated to a term with $x : A_1 + A_2$ free, but in
+CBN, $x : B_1 + B_2 \vdash E : T$ is translated to a term with $x : U \u
+F(U \u B_1 + U \u B_2)$ free, and a monadic computation that returns a
+sum does not satisfy the $\eta$ principle for sums in CBPV.  Dually, the
+$\eta$ principle for functions in CBPV is that any computation $M : A
+\to \u B$ is equal to $\lambda x.M \, x$.  A CBN term $e : B \to B'$ is
+translated to a CBPV computation of type $U \u B \to \u B'$, to which
+CBPV function extensionality applies, while a CBV term $e : A \to A'$ is
+translated to a computation of type $\u F U(A \to \u F A')$, which does
+not satisfy the $\eta$ rule for functions.
+
+%% \begin{mathpar}
+%%   \inferrule
+%%   {\Gamma \pipe \u B \vdash S : \u C}
+%%   {\Gamma \vdash S[\err] \equidyn \err}
+%%   \inferrule
+%%   {\Gamma \pipe \u B \vdash S : \u C \and \Gamma \vdash M : \u B \and \Gamma \vdash V : \texttt{Char}}
+%%   {\Gamma \vdash S[\print V; M] \equidyn \print V; S[M] : \u C}
+%% \end{mathpar}
+
 \subsection{Call-by-Push-Value, Gradualized}
 
 In figure \ref{judge} we present the syntax of Gradual Type Theory.
@@ -867,60 +1106,6 @@ and to axiomatize strong type soundness we include the $\beta$ and
 $\eta$ principles of CBPV as order equivalences, but otherwise
 unmodified.
 
-\paragraph{Stacks}
-
-The most unusual feature of call-by-push-value to someone used to
-call-by-value is the \emph{stack} judgment, which is the dual to the
-value judgment.
-%
-Semantically, we can think of open values as equivalent to those terms
-that are ``pure'' in that they never perform any effects.
-%
-On the other hand, we can think of stacks as equivalent to those terms
-between computation types that \emph{reflect all effects} of their
-input.
-%
-This is formalized with the following equivalence rules that say if a
-term is a stack filled with a term performing an effect, then that is
-equivalent to first performing the effect, and then running $M$ with
-the same stack.
-
-\begin{mathpar}
-  \inferrule
-  {\Gamma \pipe \u B \vdash S : \u C}
-  {\Gamma \vdash S[\err] \equidyn \err}
-
-  \inferrule
-  {\Gamma \pipe \u B \vdash S : \u C \and \Gamma \vdash M : \u B \and \Gamma \vdash V : \texttt{Char}}
-  {\Gamma \vdash S[\print V; M] \equidyn \print V; S[M] : \u C}
-\end{mathpar}
-
-While these rules are very intuitive when the input and output types
-of the stack are $\u F A$ for some value type (which is always the
-case in call-by-value), when we have stacks between computation types
-that are not $\u F A$, we can observe this property to be quite
-strong.
-
-For instance, consider a stack $A \to \u B \vdash S : \u F A$ between
-a function type and a $\u F$ree value type.
-%
-Consider what happens when we apply the stack to a function that
-prints.
-%
-In CBN and CBPV evaluation, $\print c; \lambda x:A. M$ is equivalent
-to $\lambda x:A.\print c; M$ because the printing only happens when
-the expression is called.
-
-\begin{align*}
-  S[\lambda x : A. \print c; M] &\equidyn S[\print c; \lambda x:A. M]\\
-  &\equidyn \print c; S[\lambda x:A. M] \\
-\end{align*}
-
-This shows us that a stack out of a function type \emph{calls the
-  function exactly once}.
-%
-Generalizing further, a stack out of an iterated function type $A_1
-\to A_2 \to \u B$ \emph{supplies all of the arguments}
 
 \subsection{Casts}
 
@@ -1014,31 +1199,6 @@ TODO: do we actually know what would go wrong in that case?
 
 \newpage
 
-\begin{figure}
-  \[
-  \begin{array}{lcl}
-    A & ::= & \dynv \mid U \u B \mid 0 \mid A_1 + A_2 \mid 1 \mid A_1 \times A_2 \\
-    \u B  & ::= & \dync \mid \u F A \mid \top \mid \u B_1 \with \u B_2 \mid A \to \u B\\
-    \Gamma & ::= & \cdot \mid \Gamma, x : A \\
-    \Delta  & ::= & \cdot \mid \bullet : \u B \\
-    \Phi  & ::= & \cdot \mid \Phi, x \ltdyn x': A \ltdyn A' \\
-    \Psi  & ::= & \cdot \mid \bullet \ltdyn \bullet : \u B \ltdyn \u B' \\
-    V  & ::= & x \mid \upcast A {A'} V \mid \upcast {\u B} {\u B'} V \mid \abort{V} 
-               \mid \inl{V} \mid \inr{V} \mid \caseofXthenYelseZ V {x_1. V_1}{x_2.V_2} \mid \\
-       & & () \mid \pmpairWtoinZ V V' \mid (V_1,V_2) \mid \pmpairWtoXYinZ V x y V' \mid \thunk{M}
-    \\
-    M & ::= & \bullet \mid \err_{\u B} \mid \letXbeYinZ V x M \mid \dncast A {A'} M \mid \dncast {\u B} {\u B'} M \mid \abort{V} \mid \\
-    & & \caseofXthenYelseZ V {x_1. M_1}{x_2.M_2} \mid \pmpairWtoinZ V M \mid \pmpairWtoXYinZ V x y M
-    \mid \force{V} \mid \\
-    & & \ret{V} \mid \bindXtoYinZ{M}{x}{N} \mid \lambda x:A.M \mid M\,V \mid \emptypair \mid \pair{M_1}{M_2} \mid \pi M \mid \pi' M
-    \\
-    T & ::= & A \mid \u B \\
-    E & ::= & V \mid M  \\
-  \end{array}
-  \]
-  \caption{GTT Syntax}
-\end{figure}
-
 \begin{figure}[t]
 \begin{small}
     
@@ -1557,6 +1717,7 @@ interpreted as value types in call-by-push-value.
 %% \end{figure}
 
 \section{Theorems in Gradual Type Theory}
+\label{sec:theorems-in-gtt}
 
 In this section, we derive equational and inequational properties of
 Gradual Type Theory.
@@ -1590,7 +1751,7 @@ between $\u F$ types and dually~\cite{levy16popl}.
 
 \smallskip
 
-\begin{lemma}[Intitial objects] ~ \label{lem:initial}
+\begin{lemma}[Initial objects] ~ \label{lem:initial}
   \begin{enumerate}
   \item For all (value or computation) types $T$, there exists a unique
     expression $x : 0 \vdash E : T$.
@@ -1951,7 +2112,7 @@ x : A \vdash \upcast{A}{\dynv}{x} \equidyn \upcast{A'}{\dynv}{\upcast{A}{A'}{x}}
 
 In Figure~\ref{fig:gtt-term-dyn-axioms}, we asserted the retract axiom
 for casts into the dynamic type.  More generally:
-\begin{theorem}{Retract Property for General Casts}
+\begin{theorem}[Retract Property for General Casts]
   \begin{enumerate}
   \item
     $\bullet : \u F A \vdash \bindXtoYinZ{\bullet}{x}{\dncast{\u F A}{\u F A'}{(\ret{(\upcast{A}{A'}{x})})}} \equidyn \bullet  : \u F A$
@@ -3445,7 +3606,7 @@ error approximation of a \emph{non-gradual} call-by-push-value
 language.
 %
 We call this the \emph{contract translation} because it translates the
-builtin casts of the gradual language into ordinary terms implemented
+built-in casts of the gradual language into ordinary terms implemented
 in a non-gradual language.
 %
 While contracts are typically implemented in a dynamically typed
@@ -3454,29 +3615,28 @@ that might be used in later stages of compilation.
 
 In particular the aim of the remaining sections of the paper is to
 establish the following theorems:
-\begin{theorem}{Equi-dynamism implies Observational Equivalence}
+\begin{theorem}[Equi-dynamism implies Observational Equivalence]
   If $\Gamma \vdash M_1 \equidyn M_2 : \u B$, then for any closing
   context $C : (\Gamma \vdash \u B) \Rightarrow (\cdot \vdash \u F
   2)$, then $C[M_1]$ and $C[M_2]$ have the same behavior: both
-  diverge, both run to an error or both run to true or both run to
-  false.
+  diverge, both run to an error, or both run to $\tru$ or both run to $\fls$.
 \end{theorem}
-\begin{theorem}{Graduality}
+\begin{theorem}[Graduality]
   If $\Gamma_1 \ltdyn \Gamma_2 \vdash M_1 \ltdyn M_2 : B_1 \ltdyn
   B_2$, then for any GTT context $C : (\Gamma_1 \vdash B_1)
   \Rightarrow (\cdot \vdash \u F 2)$, and any valid interpretation of
   the dynamic types, one of the following holds
   \begin{enumerate}
   \item $\sem{C[M_1]} \Downarrow \err$
-  \item $\sem{C[M_1]}, \sem{C[\dncast{B_1}{B_2}M_2[\upcast{\Gamma_1}{\Gamma_2}{\Gamma_1}]]} \Uparrow$
-  \item $\sem{C[M_1]},
-    \sem{C[\dncast{B_1}{B_2}M_2[\upcast{\Gamma_1}{\Gamma_2}{\Gamma_1}]]}
-    \Downarrow \ret V$ and $V = \tru$ or $V = \fls$.
+  \item $\sem{C[M_1]} \Uparrow$ and $\sem{C[\dncast{B_1}{B_2}M_2[\upcast{\Gamma_1}{\Gamma_2}{\Gamma_1}]]} \Uparrow$
+  \item $\sem{C[M_1]} \Downarrow \ret V$,~~
+    $\sem{C[\dncast{B_1}{B_2}M_2[\upcast{\Gamma_1}{\Gamma_2}{\Gamma_1}]]} 
+    \Downarrow \ret V$, and $V = \tru$ or $V = \fls$.
   \end{enumerate}
 \end{theorem}
 
 As a consequence we will also get the consistency of the type theory:
-\begin{corollary}{Consistency of GTT}
+\begin{corollary}[Consistency of GTT]
   The judgment $\vdash \ret \inl() \ltdyn \ret \inr() : \u F(1+1)$ is not
   provable in GTT.
 \end{corollary}
@@ -3506,18 +3666,18 @@ We break down this proof into 3 major steps.
   observational error approximation.
 \end{enumerate}
 
-By composing these, we get a model of GTT where equidynamism is sound
+By composing these, we get a model of GTT where equi-dynamism is sound
 for observational equivalence and an operational semantics that
 satisfies the graduality theorem.
 
 \subsection{Call-by-push-value}
 
-Next, we define the call-by-push-value language \cbpvstar that will
+Next, we define the call-by-push-value language \cbpvstar\ that will
 serve as a metalanguage that will be the target for our contract
 translations of GTT.
 %
-The star in \cbpvstar designates that this is the axiomatic version of
-call-by-push-value with complex values and stacks, whereas \cbpv will
+The star in \cbpvstar\ designates that this is the axiomatic version of
+call-by-push-value with complex values and stacks, whereas \cbpv\ will
 designate the operational version of call-by-push-value with only
 operational values and stacks, to be defined later.
 %
@@ -3530,10 +3690,13 @@ with more general types for \emph{recursive} value types and
 \emph{corecursive} computation types.
 %
 Adhering to CBPV's strict philosophy, the recursive type is a value
-type whose eliminator is by pattern matching, whereas the recursive
-type is defined by its eliminator (unroll).
+type whose eliminator is by pattern matching, whereas the corecursive
+type is a computation type defined by its eliminator (unroll).
 
 The inequational theory of CBPV includes the $\beta, \eta$ rules for 
+% AA: hanging sentence above. 
+% AA: We should explain what we mean by "+ ::=" and "- ::=" in the figure
+% below, and also say these are additions/deletions to the grammar in Fig 2.
 
 \begin{figure}[h]
   \begin{mathpar}
@@ -3605,7 +3768,7 @@ The inequational theory of CBPV includes the $\beta, \eta$ rules for
       ${\bullet : \nu \u Y. \u B \vdash \bullet \equidyn \roll\unroll \bullet : \nu \u Y. \u B}$\\
     \end{tabular}
   \end{small}
-  \caption{CBPV* types, terms, recursive types (diff from GTT)}
+  \caption{\cbpvstar\  types, terms, recursive types (diff from GTT)}
   \label{fig:cbpv-star}
 \end{figure}
 
@@ -3667,7 +3830,7 @@ types (where the projection models a downcast).
 While this formulation is very convenient in that both kinds of ep
 pairs are pairs of a value and a stack, the projection properties are
 often occur more naturally in the following forms:
-\begin{lemma}{Alternative Projection}
+\begin{lemma}[Alternative Projection]
   If $(V_e,S_p)$ is a value ep pair from $A$ to $A'$ and $\Gamma,
   y:A'\pipe\Delta \vdash M : \u B$, then
   \[ \Gamma , x' : A' \vdash \bindXtoYinZ {S_p[\ret x']} x M[V_e/y] \ltdyn M[x'/y] \]
@@ -3693,8 +3856,8 @@ often occur more naturally in the following forms:
   \end{align*}
 \end{longproof}
 \begin{definition}[Dynamic Type Interpretation]
-  A $\dynv,\dync$ interpretation $\rho$ consists of first a \cbpvtxt
-    value type $\rho(\dynv)$, a \cbpvtxt computation type $\rho(\dync)$.
+  A $\dynv,\dync$ interpretation $\rho$ consists of first a \cbpvtxt\ 
+    value type $\rho(\dynv)$, a \cbpvtxt\ computation type $\rho(\dync)$.
     This is extended in the obvious way to interpretations of all
     ground types $\srho G$ and $\srho {\u G}$.
 
@@ -3706,7 +3869,7 @@ often occur more naturally in the following forms:
 \end{definition}
 
 Next, we show several possible interpretations of the dynamic type
-that will all give by construction implementations that satisfy the
+that will all give, by construction, implementations that satisfy the
 gradual guarantee.
 
 We can motivate these definitions by considering the requirements for
@@ -3748,7 +3911,7 @@ $\dynv \cong G + \dynv_{-G}$.
   \end{longonly}
 \end{proof}
 
-So it is very natural to make the dynamic type the minimal type with
+Therefore, it is very natural to make the dynamic type the minimal type with
 injections from each $G$: i.e., just make it the sum of all value ground
 types: $\Sigma_{G} G$.
 
@@ -3836,10 +3999,10 @@ interpretations:
   \end{align*}
 \end{longproof}
 
-This dynamic type is a bit odd, and doesn't seem to reproduce any
+The above dynamic type is a bit odd, and doesn't seem to reproduce any
 existing dynamically typed language.
 %
-However it is seems like a perfectly natural dynamic type for CBPV:
+However, it seems like a perfectly natural dynamic type for CBPV:
 introduction forms for $\dynv$ are exactly the introduction forms for
 all of the value types (unit, pair,$\inl$, $\inr$, force), and
 elimination forms are all of the elimination forms for computation
@@ -3917,11 +4080,11 @@ encoded by making each case use a fresh constructor (using nominal
 techniques like opaque structs in Racket) and then making the sum the
 union of the constructors, as argued in (TODO: TH-Siek Wadlerfest).
 %
-Accurately modelling this nominal structure is out of scope for this
+Accurately modeling this nominal structure is out of scope for this
 work, but probably something like nominal type theory (TODO: cite)
 could be used.
 
-However in minimalist languages like simple dialects of Scheme and
+However, in minimalist languages, such as simple dialects of Scheme and
 Lisp, sum types are often encoded \emph{structurally} rather than
 nominally by using some fixed sum type of \emph{symbols}, also called
 \emph{atoms}.
@@ -4070,11 +4233,11 @@ if-then-else.
 Next, we add in the elimination form for $\dynv$ and the introduction
 form for $\dync$.
 %
-The elimination form for $\dynv$ is a typed version of scheme's
+The elimination form for $\dynv$ is a typed version of Scheme's
 \emph{match} macro.
 %
 Surprisingly, the introduction form for $\dync$ is very similar to a
-typed version of scheme's \emph{case-lambda} construct.
+typed version of Scheme's \emph{case-lambda} construct.
 %
 \begin{shortonly}
   In the extended version of the paper, we include the appropriate
@@ -4218,7 +4381,7 @@ typed version of scheme's \emph{case-lambda} construct.
 
   Finally, we note now that all of these axioms are satisfied when
   using the Scheme-like dynamic type interpretation and extending the
-  translation of GTT into CBPV* with the following, tediously
+  translation of GTT into \cbpvstar\  with the following, tediously
   explicit definition:
   \begin{align*}
     &\sem{\bool} = 1+1\\
@@ -4261,7 +4424,7 @@ value type is booleans $1+1$) we can restrict as follows:
 
 \subsection{Contract Translation}
 
-Next, we consider the translation of GTT into CBPV*.
+Next, we consider the translation of GTT into \cbpvstar.
 %
 For the remainder of the paper, we assume that we have a fixed dynamic
 type interpretation $\rho$, and all proofs and definitions will work
@@ -4418,6 +4581,7 @@ rules use the $\dynv, \dync$ are top rules and transitivity.
   {A \to \u B \ltdyn A' \to \u B'}
   \end{mathpar}
   \caption{Normalized Type Dynamism Relation}
+  \label{fig:normalized}
 \end{figure}
 
 \begin{lemma}[Normalized Type Dynamism is Equivalent to Original]
@@ -4491,17 +4655,17 @@ rules use the $\dynv, \dync$ are top rules and transitivity.
 Next, we extend the translation of casts to a translation of all terms.
 %
 Since all term constructions in GTT besides casts are constructions in
-CBPV*, we simply extend it by congruence.
+\cbpvstar, we simply extend it by congruence.
 %
 We note that the translation is clearly type preserving in the
 following sense:
 \begin{lemma}[Type Preservation of Contract Translation]
   If $\Gamma\pipe\Delta \vdash E : T$ in GTT, then $\sem{\Gamma}
-  \pipe\sem\Delta\vdash \sem E : \sem T$ in CBPV*.
+  \pipe\sem\Delta\vdash \sem E : \sem T$ in \cbpvstar.
 \end{lemma}
 
-In effect we have now given an interpretation of the types, terms and
-type dynamism proofs of GTT in CBPV*.
+In effect, we have now given an interpretation of the types, terms, and
+type dynamism proofs of GTT in \cbpvstar.
 %
 To complete this to form a \emph{model} of GTT, we need to given an
 interpretation of the \emph{term dynamism} proofs.
@@ -4511,8 +4675,8 @@ following ``axiomatic graduality'' theorem, that defines such an
 interpretation.
 %
 The basic idea is that GTT has \emph{heterogeneous} term dynamism
-rules indexed by type dynamism, but CBPV* has only \emph{homogeneous}
-inequalities between terms, i.e. if $E \ltdyn E'$, then $E,E'$ have
+rules indexed by type dynamism, but \cbpvstar\  has only \emph{homogeneous}
+inequalities between terms, i.e., if $E \ltdyn E'$, then $E,E'$ have
 the \emph{same} context and types.
 %
 Since every type dynamism judgment has an associated cast, we can
@@ -4573,7 +4737,7 @@ of bind and thunk.
 \begin{longonly}
   First, to keep proofs high-level, we establish the following cast
   reductions that follow easily from $\beta,\eta$ principles.
-\begin{lemma}{Cast Reductions}
+\begin{lemma}[Cast Reductions]
   The following are all provable
   \begin{align*}
     &\sem{\upcast{A_1+A_2}{A_1'+A_2'}}[\inl V] \equidyn \inl \sem{\upcast{A_1}{A_1'}}[V]\\
@@ -5393,7 +5557,7 @@ introduction/elimination forms, and are all simple calculations.
 \begin{longproof}
   By mutual induction over term dynamism derivations. For the $\beta,
   \eta$ and reflexivity rules, we use the identity expansion lemma and
-  the corresponding $\beta, \eta$ rule of CBPV* (TODO ref).
+  the corresponding $\beta, \eta$ rule of \cbpvstar\  (TODO ref).
 
   For compatibility rules a pattern emerges.  For universal rules
   (positive intro, negative elim) are easy, don't need to reason about
@@ -5832,18 +5996,18 @@ introduction/elimination forms, and are all simple calculations.
 \end{longproof}
 
 Finally, as a corollary, we have the following conservativity result
-that lets us view GTT as a kind of conservative extension of CBPV*.
+that lets us view GTT as a kind of conservative extension of \cbpvstar.
 %
 Speicifically, to prove this we need to show that the heterogeneous
-notion of term dynamism in GTT coincides with the CBPV* notion of term
+notion of term dynamism in GTT coincides with the \cbpvstar\  notion of term
 dynamism where they overlap.
 \begin{corollary}[Conservativity]
   If $\Gamma, \Delta \vdash E, E' : T$ are two terms of the same type
   in GTT, then $E \ltdyn E'$ is provable in GTT if and only if it is
-  provable in CBPV*.
+  provable in \cbpvstar.
 \end{corollary}
 \begin{proof}
-  The reverse direction holds because CBPV* is a syntactic subset of
+  The reverse direction holds because \cbpvstar\ is a syntactic subset of
   GTT. The forward direction holds by axiomatic graduality and the
   fact that identity casts are identities.
 \end{proof}
@@ -6167,7 +6331,7 @@ Dual to the treatment of values, stacks consist only of
 
 With this in mind, how is it possible that we even \emph{can}
 ``eliminate'' these forms: for closed values, we can ``evaluate away''
-the complexity and get a closed simpl value (if we don't use $U$), but
+the complexity and get a closed simple value (if we don't use $U$), but
 for open terms, evaluation will get ``stuck'' when we pattern match on
 a variable.
 %
@@ -6175,12 +6339,12 @@ The trick is that we do \emph{not} translate complex values to values
 and complex stacks to stacks.
 %
 Rather, we translate complex values $V : A$ to a term $\simp{V} : \u F
-A$ that in CBPV* is equivalent to $\ret V$, but doesn't use any
+A$ that in \cbpvstar\ is equivalent to $\ret V$, but doesn't use any
 complex values or stacks as subterms.
 %
 Similarly, we translate a complex stack $S$ with hole $\bullet : \u B$
 to a term $\simp{S}$ with a \emph{free variable} $z : U \u B$ such that
-in CBPV*, $\simp S \equidyn S[\force z]$.
+in \cbpvstar, $\simp S \equidyn S[\force z]$.
 %
 While this is type changing values and stacks, ultimately terms $M$
 will be translated to terms $\simp{M}$ with the same type.
@@ -6205,7 +6369,7 @@ fuhrmann, Guillaume MM).
 A thunkable term is a term of type $M : \u F A$ that acts like $\ret
 V$.
 %
-Intuitively this means $M$ should have no observable effects, and so
+Intuitively, this means $M$ should have no observable effects, and so
 can be freely duplicated or discarded like a value.
 %
 We can define this using the inequational theory of CBPV by saying
@@ -6299,8 +6463,8 @@ some of the proofs simpler.
   \end{mathpar}  
 \end{lemma}
 
-\begin{lemma}[De-complexification is Identity in CBPV*]
-  Considering CBPV as a subset of CBPV*, we can show
+\begin{lemma}[De-complexification is Identity in \cbpvstar]
+  Considering CBPV as a subset of \cbpvstar\, we can show
   \begin{enumerate}
   \item If $\Gamma \pipe \cdot \vdash M : \u B$  then $M \equidyn \simp M$.
   \item If $\Gamma \pipe \Delta \vdash S : \u B$ then $S[\force z] \equidyn \simp S$.
@@ -6818,7 +6982,7 @@ composition.
 \end{longproof}
 
 \begin{theorem}[Complex CBPV is Conservative over CBPV]
-  If $M, M'$ are terms in CBPV and $M \ltdyn M'$ is provable in CBPV*
+  If $M, M'$ are terms in CBPV and $M \ltdyn M'$ is provable in \cbpvstar\ 
   then $M \ltdyn M'$ is provable in CBPV.
 \end{theorem}
 \begin{longproof}
@@ -6830,7 +6994,7 @@ composition.
 
 \section{Operational Model of GTT}
 
-In this section we establish a model of our CBPV inequational theory
+In this section, we establish a model of our CBPV inequational theory
 using a notion of observational approximation based on the CBPV
 operational semantics.
 %
@@ -6887,8 +7051,8 @@ consider to be the type of whole programs.
   \label{fig:cbpv-operational-semantics}
 \end{figure}
 
-We can then observe the following standard operational properties. We
-write $M \step N$ with no index when the index is irrelevant.
+We can then observe the following standard operational properties. (We
+write $M \step N$ with no index when the index is irrelevant.)
 \begin{lemma}[Reduction is Deterministic]
   If $M \step M_1$ and $M \step M_2$, then $M_1 = M_2$.
 \end{lemma}
@@ -6907,7 +7071,7 @@ write $M \step N$ with no index when the index is irrelevant.
   \end{enumerate}
 \end{lemma}
 
-Next, though we use a small-step semantics, our definition of
+Though we use a small-step semantics, our definition of
 observational equivalence is defined with respect to the final result
 of the program.
 %
@@ -7007,7 +7171,7 @@ $\ltdyn$ that places $\err$ as the least element, and every other
 element as a maximal element.
 %
 Note that this is \emph{not} the standard notion of observational
-approximation, which we write $\preceq$, makes $\diverge$ a least
+approximation, which we write $\preceq$, which makes $\diverge$ a least
 element and every other element a maximal element.
 %
 To distinguish these, we call $\ltdyn$ \emph{error} approximation and
@@ -7017,6 +7181,7 @@ We present these graphically (with two more) in figure
 \ref{fig:result-orders}.
 
 The goal of this section is to prove
+% AA: Let's say what these properties are in English
 \begin{small}
 \begin{mathpar}
   \inferrule{\Gamma \pipe \Delta \vdash E \equidyn E' : T}{\Gamma \pipe \Delta \vDash E \ctxize= E' \in T}\and
@@ -7080,7 +7245,7 @@ $\ctxize\apreorder$ with the set of all of its \emph{finitary
   approximations} $\ix\apreorder i$ that ``time out'' after observing
 $i$ steps of evaluation.
 %
-When a time out occurs, the relation gives up and declares that the
+When a time-out occurs, the relation gives up and declares that the
 terms \emph{are} related.
 %
 This means that the original relation is only recoverable from the
@@ -7095,10 +7260,10 @@ We call such a preorder a \emph{divergence preorder}.
 \end{definition}
 
 But this presents a problem, because \emph{neither} of our intended
-relations ($=$ and $\ltdyn$) is a divergence preorder, rather in
-$\diverge$ have is a \emph{maximal} element.
+relations ($=$ and $\ltdyn$) is a divergence preorder, rather both have
+$\diverge$ as a \emph{maximal} element.
 %
-However there is a standard ``trick'' for subverting this obstacle in
+However, there is a standard ``trick'' for subverting this obstacle in
 the case of contextual equivalence (\cite{ahmedsomething}): we notice
 that we can define equivalence as the symmetrization of divergence
 approximation, i.e., $M \ctxize= N$ if and only if $M \ctxize\preceq
@@ -7135,6 +7300,7 @@ by the following two observations.
 \end{lemma}
 
 \subsection{CBPV Step Indexed Logical Relation}
+\label{sec:lr}
 
 Next, we turn to the problem of proving results of the form $E
 \ctxize\apreorder E'$ where $\apreorder$ is a divergence preorder.
@@ -7146,9 +7312,9 @@ that is much easier to use.
 Fortunately, we can apply standard logical relations techniques to
 provide an alternate definition \emph{inductively} on types.
 %
-However, since we have non-well founded type definitions using
-$\mu,\nu$, our logical relation will also be defined inductively on a
-\emph{step-index} that times out when we've exhausted our steps.
+However, since we have non-well-founded type definitions using
+$\mu$ and $\nu$, our logical relation will also be defined inductively on a
+\emph{step index} that times out when we've exhausted our step budget.
 %
 To bridge the gap between the indexed logical relation and the
 divergence preorder we care about, we define the ``finitization'' of a
@@ -7157,7 +7323,7 @@ divergence preorder to be a relation between \emph{programs} and
 at index $i$ if it reduces to $R$ in less than $i$ steps or it reduces
 at least $i$ times.
 
-\begin{definition}{Finitized Preorder}
+\begin{definition}[Finitized Preorder]
   Given a divergence preorder $\apreorder$, we define the
   \emph{finitization} of $\apreorder$ to be, for each natural number
   $i$, a relation between programs and results:
@@ -7183,10 +7349,11 @@ logical relation rather than by progress and preservation.
 However, the following properties of the indexed relation can easily
 be established.
 %
-First a kind of ``transitivity'' of the indexed relation with respect
-to the original preorder, that is key to proving transitivity of the
+First, a kind of ``transitivity'' of the indexed relation with respect
+to the original preorder, which is key to proving transitivity of the
 logical relation.
-\begin{lemma}{Indexed Relation is a Module of the Preorder}
+\begin{lemma}[Indexed Relation is a Module of the Preorder]
+\label{lem:module}
   If $M \ix\apreorder i R$ and $R \apreorder R'$ then $M \ix\apreorder i R'$
 \end{lemma}
 \begin{proof}
@@ -7197,8 +7364,8 @@ logical relation.
 
 Next, we show the relation is downward-closed, meaning it is
 \emph{easier} for the relation to be satisfied if the step-index is
-\emph{smaller} (because time outs occur earlier).
-\begin{lemma}{Downward Closure of Indexed Relation}
+\emph{smaller} (because time-outs occur earlier).
+\begin{lemma}[Downward Closure of Indexed Relation]
   If $M \ix\apreorder i R$ and $j\leq i$ then $M \ix \apreorder j R$.
 \end{lemma}
 \begin{longproof}
@@ -7210,17 +7377,17 @@ Next, we show the relation is downward-closed, meaning it is
 \end{longproof}
 
 We also get the following ``base case'' of our relation.
-\begin{lemma}{Triviality at $0$}
+\begin{lemma}[Triviality at $0$]
   For any $\cdot \vdash M : \u F 2$, $M \ix\apreorder 0 R$
 \end{lemma}
 \begin{proof}
   Because $M \bigstepsin{0} M$
 \end{proof}
 
-\begin{lemma}{Result Anti-reduction}
+\begin{lemma}[Result Anti-reduction]
   If $M \bigstepsin{i} N$ then $\result(M) = \result(N)$.
 \end{lemma}
-\begin{lemma}{Anti-reduction}
+\begin{lemma}[Anti-reduction]
   If $M \ix\apreorder i R$ and $N \bigstepsin{j} M$, then $N \ix\apreorder {{i+j}} R$
 \end{lemma}
 \begin{longproof}
@@ -7231,15 +7398,45 @@ We also get the following ``base case'' of our relation.
   \end{enumerate}
 \end{longproof}
 
+\begin{figure}
+  \begin{mathpar}
+    {\itylrof\apreorder{i}{A}} \subseteq \{ \cdot \vdash V : A \}^2
+    \qquad\qquad\qquad{\itylrof\apreorder{i}{\u B}}\subseteq \{ \cdot \pipe \u B \vdash S
+    : \u F (1 + 1) \}^2\\
+    \begin{array}{rcl}
+      \cdot \itylrof\apreorder i {\cdot} \cdot &=& \top\\
+      \gamma_1,V_1/x \itylrof\apreorder i {\Gamma,x:A} \gamma_2,V_2/x &=& \gamma_1 \itylrof\apreorder i \Gamma \gamma_2 \wedge V_1 \itylrof\apreorder i A V_2\\
+      V_1 \itylr i 0 V_2 &=& \bot\\
+      \inl V_1 \itylr i {A + A'} \inl V_2 &= & V_1 \itylr i A V_2\\
+      \inr V_1 \itylr i {A + A'} \inr V_2 &= & V_1 \itylr i {A'} V_2 \\
+      () \itylr i 1 () &=& \top\\
+      (V_1,V_1') \itylr i {A \times A'} (V_2, V_2') &=& V_1 \itylr i A V_2 \wedge V_1' \itylr i {A'} V_2'\\
+      % Should this be a roll or not?
+      \rollty {\mu X. A} V_1 \itylr i {\mu X. A} \rollty {\mu X. A} V_2 &=& i = 0 \vee V_1 \itylr {i-1} {A[\mu X.A/X]} V_2\\
+      V_1 \itylr i {U \u B} V_2 &=& \forall j \leq i, S_1 \itylr j {\u B} S_2.~ S_1[\force V_1] \ix\apreorder j \result(S_2[\force V_2]) \\\\
+
+      S_1[\bullet V_1] \itylr i {A \to \u B} S_1[\bullet V_2] & = & V_1 \itylr i A V_2 \wedge S_1 \itylr {i}{\u B} S_2\\
+      S_1[\pi_1 \bullet] \itylr i {\u B \wedge \u B'} S_2[\pi_1 \bullet] &=& S_1 \itylr i {\u B} S_2\\
+      S_1[\pi_2 \bullet] \itylr i {\u B \wedge \u B'} S_2[\pi_2 \bullet] &=& S_1 \itylr i {\u B'} S_2\\
+      S_1 \itylr i {\top} S_2 &=& \bot\\
+      S_1[\unroll \bullet] \itylr i {\nu \u Y. \u B} S_2[\unroll \bullet] &=& i = 0 \vee S_1 \itylr {i-1} {\u B[\nu \u Y. \u B/\u Y]} S_2\\
+      S_1 \itylr i {\u F A} S_2 & = & \forall j\leq i, V_1 \itylr j A V_2.~ S_1[\ret V_1] \ix\apreorder j \result(S_2[\ret V_2])
+    \end{array}
+  \end{mathpar}
+  \caption{Logical Relation from a Preorder $\apreorder$}
+  \label{fig:lr}
+\end{figure}
+
+
 Next, we define the \emph{logical} preorder by induction on types and
-the index $i$ in figure \ref{lr}.
+the index $i$ in figure \ref{fig:lr}.
 %
 Specifically, for every $i$ and value type $A$ we define a relation
 $\itylrof \apreorder i A$ between closed values of type $A$ because
-these are the only ones that will be pattern matched against at
+these are the only ones that will be pattern-matched against at
 runtime.
 %
-The relation is defined in a type-directed fashion, the intuition is
+The relation is defined in a type-directed fashion, the intuition being
 that we relate two positive values when they are built up in the same
 way: i.e., they have the same introduction form and their subterms are
 related.
@@ -7270,8 +7467,8 @@ downward-closed.
 This is known as the \emph{orthogonal} of the relation, and one
 advantage of the CBPV language is that it makes the use of
 orthogonality \emph{explicit} in the type structure, analogous to the
-benefits of using Nakano's \emph{later} modality for step indexing
-(which we ironically do not do) \cite{nakano}.
+benefits of using Nakano's \emph{later} modality \cite{nakano} for step indexing
+(which we ironically do not do).
 
 Next, we define when two \emph{stacks} are related.
 %
@@ -7305,33 +7502,6 @@ We extend the definition to contexts to \emph{closing substitutions}
 pointwise: two closing substitutions for $\Gamma$ are related at $i$
 if they are related at $i$ for each $x:A \in \Gamma$.
 
-\begin{figure}
-  \begin{mathpar}
-    {\itylrof\apreorder{i}{A}} \subseteq \{ \cdot \vdash V : A \}^2
-    \qquad\qquad\qquad{\itylrof\apreorder{i}{\u B}}\subseteq \{ \cdot \pipe \u B \vdash S
-    : \u F (1 + 1) \}^2\\
-    \begin{array}{rcl}
-      \cdot \itylrof\apreorder i {\cdot} \cdot &=& \top\\
-      \gamma_1,V_1/x \itylrof\apreorder i {\Gamma,x:A} \gamma_2,V_2/x &=& \gamma_1 \itylrof\apreorder i \Gamma \gamma_2 \wedge V_1 \itylrof\apreorder i A V_2\\
-      V_1 \itylr i 0 V_2 &=& \bot\\
-      \inl V_1 \itylr i {A + A'} \inl V_2 &= & V_1 \itylr i A V_2\\
-      \inr V_1 \itylr i {A + A'} \inr V_2 &= & V_1 \itylr i {A'} V_2 \\
-      () \itylr i 1 () &=& \top\\
-      (V_1,V_1') \itylr i {A \times A'} (V_2, V_2') &=& V_1 \itylr i A V_2 \wedge V_1' \itylr i {A'} V_2'\\
-      % Should this be a roll or not?
-      \rollty {\mu X. A} V_1 \itylr i {\mu X. A} \rollty {\mu X. A} V_2 &=& i = 0 \vee V_1 \itylr {i-1} {A[\mu X.A/X]} V_2\\
-      V_1 \itylr i {U \u B} V_2 &=& \forall j \leq i, S_1 \itylr j {\u B} S_2.~ S_1[\force V_1] \ix\apreorder j \result(S_2[\force V_2]) \\\\
-
-      S_1[\bullet V_1] \itylr i {A \to \u B} S_1[\bullet V_2] & = & V_1 \itylr i A V_2 \wedge S_1 \itylr {i}{\u B} S_2\\
-      S_1[\pi_1 \bullet] \itylr i {\u B \wedge \u B'} S_2[\pi_1 \bullet] &=& S_1 \itylr i {\u B} S_2\\
-      S_1[\pi_2 \bullet] \itylr i {\u B \wedge \u B'} S_2[\pi_2 \bullet] &=& S_1 \itylr i {\u B'} S_2\\
-      S_1 \itylr i {\top} S_2 &=& \bot\\
-      S_1[\unroll \bullet] \itylr i {\nu \u Y. \u B} S_2[\unroll \bullet] &=& i = 0 \vee S_1 \itylr {i-1} {\u B[\nu \u Y. \u B/\u Y]} S_2\\
-      S_1 \itylr i {\u F A} S_2 & = & \forall j\leq i, V_1 \itylr j A V_2.~ S_1[\ret V_1] \ix\apreorder j \result(S_2[\ret V_2])
-    \end{array}
-  \end{mathpar}
-  \caption{Logical Relation from a Preorder $\apreorder$}
-\end{figure}
 
 Next, analogous to the contextual preorder, we define the
 \emph{logical} preorder on terms, values and stacks.
@@ -7341,7 +7511,7 @@ observation by application of program contexts, the logical preorder
 defines observation in terms of the ``input-output'' behavior: given
 related inputs, the terms must give related observations under related
 stacks.
-\begin{definition}{Logical Preorder}
+\begin{definition}[Logical Preorder]
   Given a preorder on results $\apreorder$ with $\diverge$ a least
   element, we define the step-indexed logical preorder as follows:
   \begin{enumerate}
@@ -7363,7 +7533,7 @@ relation, i.e., the fundamental lemma of the logical relation.
 %
 This requires the easy lemma, that the relation on closed terms and
 stacks is downward closed.
-\begin{lemma}{Logical Relation Downward Closure}
+\begin{lemma}[Logical Relation Downward Closure]
   \begin{enumerate}
   \item If $V_1 \itylrof\apreorder i A V_2$ and $j\leq i$ then $V_1
     \itylrof\apreorder j A V_2$
@@ -7371,9 +7541,9 @@ stacks is downward closed.
     \itylrof\apreorder j {\u B} S_2$
   \end{enumerate}
 \end{lemma}
-\begin{theorem}{Logical Preorder is a Congruence}
-  For any preorder on results with diverge least element, the logical
-  preorder $E \ilrof\apreorder i E'$ is a congruence relation, i.e., that
+\begin{theorem}[Logical Preorder is a Congruence]
+  For any preorder on results with $\diverge$ a least element, the logical
+  preorder $E \ilrof\apreorder i E'$ is a congruence relation, i.e., 
   it is closed under applying any value/term/stack constructors to
   both sides.
 \end{theorem}
@@ -7590,7 +7760,7 @@ stacks is downward closed.
 \end{longproof}
 
 As a direct consequence we get the reflexivity of the relation.
-\begin{corollary}{Reflexivity}
+\begin{corollary}[Reflexivity]
   For any $\Gamma \vdash M : \u B$, and $i \in \mathbb{N}$,
   \[\Gamma \vDash M \ilrof\apreorder i  M \in \u B.\]
 \end{corollary}
@@ -7601,7 +7771,7 @@ counts unrolling steps.
 %
 This shows for instance that terms that never use $\mu$ or $\nu$ types
 are guaranteed to terminate.
-\begin{corollary}{Unary LR}
+\begin{corollary}[Unary LR]
   For every program $\cdot \vdash M : \u F 2$ and $i \in \mathbb{N}$,
   \[ M \ix\apreorder i \result(M) \]
 \end{corollary}
@@ -7612,6 +7782,7 @@ are guaranteed to terminate.
 \end{longproof}
 
 Next, we can show that $\result(M)$ is the 
+% AA: Incomplete sentence above.
 
 Using the reflexivity proof, we can now prove that the indexed
 relation between terms and results recovers the original preorder in
@@ -7620,7 +7791,7 @@ the limit as $i \to \omega$.
 We write $\ix\apreorder \omega$ to mean the relation holds for every
 $i \in \mathbb N$, i.e., $\ix\apreorder\omega =
 \bigcap_{i\in\mathbb{N}} \ix\apreorder i$.
-\begin{corollary}[{In the limit, Finitized Preorder Recovers Original}]
+\begin{corollary}[In the limit, Finitized Preorder Recovers Original]
   \label{lem:limit}
   For any divergence preorder $\apreorder$,
   \[ \result(M) \apreorder R \iff \forall i \in \mathbb{N}.~ M \ix\apreorder i R \]
@@ -7647,7 +7818,7 @@ $i \in \mathbb N$, i.e., $\ix\apreorder\omega =
   \end{enumerate}
 \end{longproof}
 
-\begin{corollary}[{LR is Sound wrt Contextual Preorder}]
+\begin{corollary}[LR is Sound wrt Contextual Preorder]
   If $\Gamma \vDash E \ilrof\apreorder \omega E' \in \u B$
   then
   $\Gamma \vDash E \ctxize\apreorder E' \in \u B$.
@@ -7664,7 +7835,7 @@ $i \in \mathbb N$, i.e., $\ix\apreorder\omega =
 In fact, we can prove the converse, that at least for the term case,
 the logical preorder is \emph{complete} with respect to the contextual
 preorder.
-\begin{lemma}[{Logical Preorder is Complete wrt Contextual Preorder}]
+\begin{lemma}[Logical Preorder is Complete wrt Contextual Preorder]
   For any $\apreorder$, if $\Gamma \vDash M \ctxize \apreorder N \in
   \u B$, then $\Gamma \vDash M \ilrof\apreorder \omega N \in \u B$.
 \end{lemma}
@@ -7720,7 +7891,7 @@ all of our logical relations (open and closed) are transitive in the
 limit. To do this, we first prove the following kind of
 ``quantitative'' transitivity lemma, and then transitivity in the
 limit is a consequence.
-\begin{lemma}{Logical Relation is Quantitatively Transitive}
+\begin{lemma}[Logical Relation is Quantitatively Transitive]\hfill
   \begin{enumerate}
   \item If $V_1 \itylr i A V_2$ and $V_2 \itylr
     \omega A V_3$, then $V_1 \itylr i A V_3$
@@ -7757,7 +7928,7 @@ limit is a consequence.
   \end{enumerate}
 \end{longproof}
 
-\begin{lemma}{Logical Relation is Quantitatively Transitive (Open Terms)}
+\begin{lemma}[Logical Relation is Quantitatively Transitive (Open Terms)]\hfill
   \begin{enumerate}
   \item If $\gamma_1 \itylr i \Gamma \gamma_2$ and $\gamma_2 \itylr
     \omega \Gamma \gamma_3$, then $\gamma_1 \itylr i \Gamma \gamma_3$
@@ -7793,7 +7964,7 @@ limit is a consequence.
   \end{enumerate}
 \end{longproof}
 
-\begin{corollary}{Logical Relation is Transitive in the Limit}
+\begin{corollary}[Logical Relation is Transitive in the Limit]\hfill
   \begin{enumerate}
   \item If $\Gamma \vDash M_1 \ilrof\apreorder \omega M_2 \in \u B$ and
     $\Gamma \vDash M_2 \ilrof\apreorder \omega M_3 \in \u B$, then
@@ -7809,8 +7980,8 @@ limit is a consequence.
 
 Next, we verify the $\beta, \eta$ equivalences hold as orderings each
 way.
-\begin{lemma}{$\beta, \eta$ Laws are valid}
-  For any preorder with diverge a least element, the $\beta, \eta$
+\begin{lemma}[$\beta, \eta$ Laws are valid]
+  For any preorder with $\diverge$ a least element, the $\beta, \eta$
   laws are valid for $\ilrof\apreorder \omega$
 \end{lemma}
 \begin{longproof}
@@ -8040,7 +8211,7 @@ way.
   \end{enumerate}
 \end{longproof}
 
-\begin{lemma}{Substitution Principles}
+\begin{lemma}[Substitution Principles]
   For any diverge-bottom preorder $\apreorder$, the following are
   valid
   \begin{enumerate}
@@ -8066,7 +8237,7 @@ Finally, we verify the axioms about errors.
 The strictness axioms hold for any $\apreorder$, but the axiom that
 $\err$ is a least element hold specifically in $\precltdyn, \ltdyn\succeq$
 
-\begin{lemma}[{Validity of Error Rules}]
+\begin{lemma}[Validity of Error Rules]
   For any divergence-least preorder $\apreorder$,
   \begin{mathpar}
     \Gamma \vDash S[\err] \ilr i \err \in \u B \and
@@ -8093,7 +8264,7 @@ $\err$ is a least element hold specifically in $\precltdyn, \ltdyn\succeq$
 
 Finally, the lemmas we have shown cover all of the cases of the
 following theorem that says our logical relation is a model of CBPV.
-\begin{lemma}{$\precltdyn$ and $\ltdynsucc$ are Models of CBPV}
+\begin{lemma}[$\precltdyn$ and $\ltdynsucc$ are Models of CBPV]
   If $\Gamma \pipe \Delta \vdash E \ltdyn E' : \u B$ then both of the
   following hold:
   \begin{mathpar}
@@ -8105,7 +8276,7 @@ following theorem that says our logical relation is a model of CBPV.
 As shown in section (TODO: the observational approximation section),
 with the soundness theorem, this gives that $\ctxize\ltdyn$ is a model
 as well.
-\begin{theorem}{$\ctxize \ltdyn$ is a Model of CBPV $\ltdyn$}
+\begin{theorem}[$\ctxize \ltdyn$ is a Model of CBPV $\ltdyn$]
   \[
   \inferrule
   {\Gamma \pipe \Delta \vdash E \ltdyn E' : T}
@@ -8119,7 +8290,7 @@ as well.
 
 Which shows also that contextual equivalence is a model of
 equi-dynamism.
-\begin{corollary}{$\ctxize =$ is a Model of CBPV $\equidyn$}
+\begin{corollary}[$\ctxize =$ is a Model of CBPV $\equidyn$]
   \[
   \inferrule
   {\Gamma \pipe \Delta \vdash E \equidyn E' : T}
@@ -8127,11 +8298,11 @@ equi-dynamism.
   \]
 \end{corollary}
 
-\section{Discussion}
+\section{Discussion and Related Work}
 
 \paragraph{Casts as Galois Connections}
 
-\paragraph{Thunkable, Linear, Contextual Isomorphisms}
+\paragraph{Thunkable, Linear, and Contextual Isomorphisms}
 
 Our use of complex values for upcasts and complex stacks for downcasts
 seems closely related to the use of thunkable isomorphisms and linear
@@ -8152,7 +8323,7 @@ relates to our system.
 
 \paragraph{Gradual Typing Frameworks}
 
-Our approach to systemactically producing a gradual type theory from a
+Our approach to systematically producing a gradual type theory from a
 type theory is similar in spirit to ``frameworks'' for gradual typing,
 specifically AGT \cite{AGT} and the Gradualizer \cite{gradualizer}.
 %
@@ -8172,8 +8343,9 @@ the axiomatic semantics of the typed language.
 This means our approach accomplishes something quite different: their
 frameworks produce specific implementations, whereas our approach is
 about finding commonalities across different implementations.
+% AA: Perhaps the above point should also be made in the intro. 
 
-Furthermore, while both of their approaches prove various gradual
+Furthermore, while both the AGT and Gradualizer approaches prove various gradual
 typing correctness theorems by construction (gradual type safety,
 conservativity, gradual guarantee), neither considers the program
 equivalences that we argued are the raison d'\^etre of gradual typing
@@ -8184,18 +8356,20 @@ In fact, the systems both \emph{fail} to preserve these equivalences.
 First, AGT reproduces the ``eager'' semantics of function casts,
 breaking the $\eta$ equality of functions.
 %
-Gradualizer on the other hand produces the ``lazy'' semantics and thus
+Gradualizer, on the other hand, produces the ``lazy'' semantics and thus
 should validate the $\eta$ equality, though this does not seem to be a
-central component of the language: a different version of gradualizer
-could produce the eager semantics and satisfy all of their criteria.
+central component of the language: a different version of Gradualizer
+could produce the ``eager'' semantics and satisfy all of their criteria.
 %
-Both systems on the other hand, fail to preserve the program
+Both systems, however, fail to preserve the program
 equivalences of parametric polymorphism, and in addition fail to meet
 gradual typing criteria for stateful references.
 %
 While we have not considered these features, we have reason to believe
 that our approach will be more successful: specifically, the
-``reason'' that all of these occur is that they are not safety
+``reason'' that all of these occur 
+% AA: what occur? what is "all of these"?
+is that they are not safety
 properties of the operational semantics, and the operational semantics
 is the only input to these frameworks.
 %
@@ -8231,10 +8405,11 @@ Since casts must be minimal in the refinement ordering, the only
 observable effects for casts should be dynamic type errors, this means
 that $\upcast{\u F A}{\u F A'}\ret V \mapsto \err$, and we must decide
 whether the positive party or negative party is at fault.
+% AA: above sentence is hard to follow.  Too long and a bit jargon-y.
 %
-However since this is call-by-value evaluation, this error happens
+However, since this is call-by-value evaluation, this error happens
 unconditionally on the continuation, so the continuation never had a
-chance to behave in such a way to prevent blame, so we must blame the
+chance to behave in such a way as to prevent blame, so we must blame the
 positive party.
 
 Dually, if a downcast of computation types were not a priori linear,
@@ -8242,7 +8417,7 @@ it would be of the form $\dncast{U \u B}{U \u B'}$ and if it were
 truly not linear, that would mean it forces its $U \u B'$ input either
 never or more than once.
 %
-Since downcasts should be refine their inputs, it is not possible for
+Since downcasts should refine their inputs, it is not possible for
 the downcast to use the argument twice, since printing twice does not
 refine printing once.
 %
@@ -8349,7 +8524,7 @@ However, their formulation is quite different in that sessions are
 considered to be a \emph{subset} of value types whereas in ours there
 are explicit coercions between value and computation types.
 %
-Furthermore their language is not \emph{polarized} in the same way as
+Furthermore, their language is not \emph{polarized} in the same way as
 CBPV, so there is not likely an analogue between our upcasts being
 always between value types and downcasts being always between
 computation types in their system.
@@ -8364,7 +8539,7 @@ session types.
 Our interpretation of the dynamic types in CBPV suggests a design for
 a Scheme-like language with a value and computation distinction.
 %
-This may be of interest for designing an extension of Type Racket that
+This may be of interest for designing an extension of Typed Racket that
 efficiently supports CBN or a scheme-like language with codata types.
 %
 While the definition of the dynamic computation type by a lazy product
@@ -8373,8 +8548,8 @@ dual, the sum type, in the definition of the dynamic value type.
 %
 That is, in a truly dynamically typed language, we would not think of
 the dynamic type as being built out of some sum type construction, but
-rather that it is the \emph{union} of all of the ground value types,
-and the union happens to be a \emph{disjoint} union and so we can
+rather that it is the \emph{union} of all of the ground value types, and the
+union happens to be a \emph{disjoint} union and so we can 
 model it as a sum type.
 %
 In the dual, we don't think of the computation dynamic type as a
@@ -8389,6 +8564,8 @@ invoked with any finite number of arguments on the stack, a fairly
 accurate model of implementations of Scheme that pass multiple
 arguments on the stack.
 
+\bibliography{max}
+
 \appendix
 
 \section{Program Equivalences and Inequivalences in Previous Work}
diff --git a/paper/max.bib b/paper/max.bib
new file mode 100644
index 0000000000000000000000000000000000000000..f70e5d4218848590882de70b5adb40de0ab42905
--- /dev/null
+++ b/paper/max.bib
@@ -0,0 +1,977 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%                       String Definitions                         %%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% Publishing houses
+% %%%%%%%%%%%%%%%%%
+@STRING{ap =        "Academic Press" }
+@STRING{springer =  {Springer-Verlag} }
+@STRING{lncs =      "Lecture Notes in Computer Science" }
+@STRING{lnm =       "Lecture Notes in Mathematics" }
+@STRING{gtm =       "Graduate Texts in Mathematics" }
+@STRING{lfcs =      "Laboratory for Foundations of Computer Science,
+                    University of Edinburgh" }
+@STRING{mp =        "MIT Press" }
+@STRING{mitpress =  mp }
+%
+% Journals and magazines
+% %%%%%%%%%%%%%%%%%%%%%%
+@STRING{toplas =    "ACM Transactions on Programming Languages and Systems" }
+@STRING{jlp =       {Journal of Logic Programming} }
+@STRING{cacm =      "Communications of the {ACM}" }
+@STRING{jacm =      "Journal of the {ACM}" }
+@STRING{tcs =       "Theoretical Computer Science" }
+@STRING{jsl =       "Journal of Symbolic Logic" }
+@STRING{siamjc =    "SIAM Journal on Computing" }
+@STRING{mscs =      "Mathematical Structures in Computer Science" }
+@STRING{proc =      "Proceedings of the" }
+@STRING{ic =        "Information and Computation" }
+@STRING{iandcomp =  {Information and Computation} }
+@string{ifip =      "International Federation for Information Processing World Computer Congress (IFIP)"}
+@STRING{jfp =       "Journal of Functional Programming" }
+@STRING{lmcs =      "Logical Methods in Computer Science" }
+@string{hosc =      "Higher-Order and Symbolic Computation"}
+@STRING{cup =       "Cambridge University Press" }
+@STRING{mcgh =      "McGraw-Hill" }
+@STRING{nh =        "North Holland" }
+@STRING{sv =        "Springer-Verlag" }
+@STRING{aw =        "Addison-Wesley" }
+@STRING{ph =        "Prentice Hall" }
+@STRING{signot =    "SIGPLAN Notices"}
+@STRING{taoop =     "Carl A. Gunter and John C. Mitchell, editors, {\em
+                    Theoretical Aspects of Object-Oriented Programming:
+                    Types, Semantics, and Language Design}, MIT Press, 1994" }
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%                             Conferences                                %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+@STRING{popl =      "{ACM} {S}ymposium on {P}rinciples of {P}rogramming
+                     {L}anguages ({POPL})" }
+@STRING{popl73 =    popl # ", Boston, Massachusetts" }
+@STRING{popl75 =    popl # ", Palo Alto, California" }
+@STRING{popl76 =    popl # ", {A}tlanta, {G}eorgia" }
+@STRING{popl77 =    popl # ", Los Angeles, California" }
+@STRING{popl78 =    popl # ", Tucson, Arizona" }
+@STRING{popl79 =    popl # ", San Antonio, Texas" }
+@STRING{popl80 =    popl # ", Las Vegas, Nevada" }
+@STRING{popl81 =    popl # ", Williamsburg, Virginia" }
+@STRING{popl82 =    popl # ", Albuquerque, New Mexico" }
+@STRING{popl83 =    popl # ", Austin, Texas" }
+@STRING{popl84 =    popl # ", Salt Lake City, Utah" }
+@STRING{popl85 =    popl # ", New Orleans, Louisiana" }
+@STRING{popl86 =    popl # ", St.\ Petersburg Beach, Florida" }
+@STRING{popl87 =    popl # ", Munich, Germany" }
+@STRING{popl88 =    popl # ", San Diego, California" }
+@STRING{popl89 =    popl # ", Austin, Texas" }
+@STRING{popl90 =    popl # ", {S}an {F}rancisco, {C}alifornia" }
+@STRING{popl91 =    popl # ", Orlando, Florida" }
+@STRING{popl92 =    popl # ", Albuquerque, New Mexico" }
+@STRING{popl93 =    popl # ", Charleston, South Carolina" }
+@STRING{popl94 =    popl # ", {P}ortland, {O}regon" }
+@STRING{popl95 =    popl # ", San Francisco, California" }
+@STRING{popl96 =    popl # ", St.~Petersburg Beach, Florida" }
+@STRING{popl97 =    popl # ", Paris, France" }
+@STRING{popl98 =    popl # ", San Diego, California" }
+@STRING{popl99 =    popl # ", San Antonio, Texas" }
+@STRING{popl00 =    popl # ", Boston, Massachusetts" }
+@STRING{popl01 =    popl # ", London, England" }
+@STRING{popl02 =    popl # ", Portland, Oregon" }
+@STRING{popl03 =    popl # ", New Orleans, Louisiana" }
+@STRING{popl04 =    popl # ", Venice, Italy" }
+@STRING{popl05 =    popl # ", Long Beach, California" }
+@STRING{popl06 =    popl # ", Charleston, South Carolina" }
+@STRING{popl07 =    popl # ", Nice, France" }
+@STRING{popl08 =    popl # ", San Francisco, California" }
+@STRING{popl09 =    popl # ", Savannah, Georgia" }
+@STRING{popl10 =    popl # ", Madrid, Spain" }
+@STRING{popl11 =    popl # ", Austin, Texas" }
+@STRING{popl12 =    popl # ", Philadelphia, Pennsylvania" }
+@STRING{popl13 =    popl # ", Rome, Italy" }
+@STRING{popl14 =    popl # ", San Diego, California" }
+@STRING{popl15 =    popl # ", Mumbai, India" }
+@STRING{popl16 =    popl # ", St. Petersburg, Florida" }
+% ----
+@STRING{icfp =      "{I}nternational {C}onference on {F}unctional {P}rogramming
+                    ({ICFP})" }
+@STRING{icfp96 =    icfp # ", Philadelphia, Pennsylvania" }
+@STRING{icfp97 =    icfp # ", Amsterdam, The Netherlands" }
+@STRING{icfp98 =    icfp # ", Baltimore, Maryland, USA" }
+@STRING{icfp99 =    icfp # ", Paris, France" }
+@STRING{icfp00 =    icfp # ", Montreal, Canada" }
+@STRING{icfp01 =    icfp # ", Firenze, Italy" }
+@STRING{icfp02 =    icfp # ", Pittsburgh, Pennsylvania" }
+@STRING{icfp03 =    icfp # ", Uppsala, Sweden" }
+@STRING{icfp04 =    icfp # ", Snowbird, Utah" }
+@STRING{icfp05 =    icfp # ", Tallinn, Estonia" }
+@STRING{icfp06 =    icfp # ", Portand, Oregon" }
+@STRING{icfp07 =    icfp # ", Freiburg, Germany" }
+@STRING{icfp08 =    icfp # ", Victoria, British Columbia, Canada" }
+@STRING{icfp09 =    icfp # ", Edinburgh, Scotland" }
+@STRING{icfp10 =    icfp # ", Baltimore, Maryland" }
+@STRING{icfp11 =    icfp # ", Tokyo, Japan" }
+@STRING{icfp12 =    icfp # ", Copenhagen, Denmark" }
+@STRING{icfp13 =    icfp # ", Boston, Massachusetts" }
+@STRING{icfp14 =    icfp # ", Gothenburg, Sweden" }
+@STRING{icfp15 =    icfp # ", Vancouver, British Columbia, Canada" }
+@STRING{icfp16 =    icfp # ", Nara, Japan" }
+@STRING{icfp17 =    icfp # ", Oxford, United Kingdom" }
+
+% ----
+@STRING{oopsla =    "{ACM} {S}ymposium on {O}bject {O}riented {P}rogramming:
+                    {S}ystems, {L}anguages, and {A}pplications ({OOPSLA})" }
+@STRING{oopsla86 =  oopsla # ", Portland, Oregon" }
+@STRING{oopsla89 =  oopsla }
+@STRING{oopsla98 =  oopsla # ", Vancouver, British Columbia" }
+@STRING{oopsla08 =  oopsla # ", Nashville, Tennessee" }
+% ----
+@STRING{lics =      "IEEE Symposium on Logic in Computer Science (LICS)" }
+@STRING{lics91 =    lics # ", Amsterdam, The Netherlands" }
+@STRING{lics92 =    lics # ", Santa Cruz, California" }
+@STRING{lics93 =    lics # ", Montreal, Canada" }
+@STRING{lics94 =    lics # ", Paris, France" }
+@STRING{lics95 =    lics # ", San Diego, California" }
+@STRING{lics96 =    lics # ", New Brunswick, New Jersey" }
+@STRING{lics97 =    lics # ", Warsaw, Poland" }}
+@STRING{lics98 =    lics # ", Indianapolis, Indiana" }
+@STRING{lics99 =    lics # ", Trento, Italy" }
+@STRING{lics00 =    lics # ", Santa Barbara, California" }
+@STRING{lics01 =    lics # ", Boston, Massachusetts" }
+@STRING{lics02 =    lics # ", Copenhagen, Denmark" }
+@STRING{lics03 =    lics # ", Ottawa, Canada" }
+@STRING{lics04 =    lics # ", Turku, Finland" }
+@STRING{lics05 =    lics # ", Chicago, Illinois" }
+@STRING{lics06 =    lics # ", Seattle, Washington" }
+@STRING{lics07 =    lics # ", Wroclaw, Poland" }
+@STRING{lics08 =    lics # ", Pittsburgh, Pennsylvania" }
+@STRING{lics09 =    lics # ", Los Angeles, California" }
+@STRING{lics10 =    lics # ", Edinburgh, Scotland" }
+@STRING{lics11 =    lics # ", Toronto, Canada" }
+@STRING{lics12 =    lics # ", Dubrovnik, Croatia" }
+@STRING{lics13 =    lics # ", New Orleans, Louisiana" }
+@STRING{lics14 =    lics # ", Vienna, Austria" }
+% ----
+@STRING{pldi =      "{ACM SIGPLAN Conference on Programming Language Design
+                    and Implementation (PLDI)}" }
+@STRING{pldi88 =    pldi # ", {A}tlanta, {G}eorgia" }
+@STRING{pldi89 =    pldi # ", Portland, Oregon" }
+@STRING{pldi90 =    pldi # ", White Plains, New York" }
+@STRING{pldi91 =    pldi # ", Toronto, Ontario" }
+@STRING{pldi92 =    pldi # ", San Francisco, California" }
+@STRING{pldi93 =    pldi # ", Albuquerque, New Mexico" }
+@STRING{pldi94 =    pldi # ", Orlando, Florida" }
+@STRING{pldi95 =    pldi # ", La Jolla, California" }
+@STRING{pldi96 =    pldi # ", Philadephia, Pennsylvania" }
+@STRING{pldi97 =    pldi # ", Las Vegas, Nevada" }
+@STRING{pldi98 =    pldi # ", {M}ontreal, {C}anada" }
+@STRING{pldi99 =    pldi # ", {A}tlanta, {G}eorgia" }
+@STRING{pldi00 =    pldi # ", Vancouver, British Columbia, Canada" }
+@STRING{pldi01 =    pldi # ", Snowbird, Utah" }
+@STRING{pldi02 =    pldi # ", Berlin, Germany" }
+@STRING{pldi03 =    pldi # ", San Diego, California" }
+@STRING{pldi04 =    pldi # ", Washington, DC" }
+@STRING{pldi05 =    pldi # ", Chicago, Illinois" }
+@STRING{pldi06 =    pldi # ", Ottawa, Canada" }
+@STRING{pldi07 =    pldi # ", San Diego, California" }
+@STRING{pldi08 =    pldi # ", Tucson, Arizona" }
+@STRING{pldi09 =    pldi # ", Dublin, Ireland" }
+@STRING{pldi10 =    pldi # ", Toronto, Canada" }
+@STRING{pldi11 =    pldi # ", San Jose, California" }
+@STRING{pldi12 =    pldi # ", Beijing, China" }
+@STRING{pldi13 =    pldi # ", Seattle, Washington" }
+@STRING{pldi14 =    pldi # ", Edinburgh, Scotland" }
+% ----
+@STRING{lfp =       "ACM Symposium on Lisp and Functional Programming (LFP)" }
+@STRING{lfp84 =     lfp # ", Austin, Texas" }
+@STRING{lfp88 =     lfp # ", Snowbird, Utah" }
+@STRING{lfp90 =     lfp }
+@STRING{lfp92 =     lfp }
+% ----
+@STRING{fpca =      "ACM Symposium on Functional Programming Languages and
+                    Computer Architecture (FPCA)" }
+@STRING{fpca89 =    fpca }
+@STRING{fpca93 =    fpca }
+% ----
+@STRING{sosp=" {ACM} {S}ymposium on {O}perating {S}ystems {P}rinciples ({SOSP})"}
+% ----
+@STRING{fool =      "Workshop on Foundations of Object-Oriented Languages
+                    (FOOL), informal proceedings" }
+@STRING{tlca =      "Typed Lambda Calculi and Applications (TLCA)" }
+@STRING{tlca93 =    tlca # ", Utrecht, The Netherlands" }
+@STRING{tlca95 =    tlca # ", Edinburgh, UK" }
+@STRING{tlca97 =    tlca # ", Nancy, France" }
+@STRING{tlca99 =    tlca # ", L'Aquila, Italy" }
+@STRING{tlca01 =    tlca # ", Krakow, Poland" }
+@STRING{tlca03 =    tlca # ", Valencia, Spain" }
+@STRING{tlca05 =    tlca # ", Nara, Japan" }
+@STRING{tlca07 =    tlca # ", Paris, France" }
+@STRING{tlca09 =    tlca # ", Brasilia, Brazil" }
+@STRING{tlca11 =    tlca # ", Novi Sad, Serbia" }
+@STRING{tlca13 =    tlca # ", Eindhoven, The Netherlands" }
+
+@string{cav =       "Conference on Computer Aided Verification (CAV)"}
+@string{ccs =       "ACM Conference on Computer and Communication Security (CCS)"}
+@string{oakland =   "{IEEE} Symposium on Security and Privacy"}
+@string{dls =       "Dynamic Languages Symposium (DLS)"}
+@STRING{esop =      "European Symposium on Programming (ESOP)" }
+@STRING{esop92 =    esop # ", Rennes, France" }
+@STRING{ecoop =     "{E}uropean {C}onference on {O}bject-{O}riented
+                    {P}rogramming ({ECOOP})" }
+@STRING{ecoop97 =   ecoop }
+@STRING{ecoop02 =   ecoop # ", Malaga, Spain" }
+@STRING{flops =     "International Symposium on Functional and Logic Programming (FLOPS)" }
+@STRING{flops02 =   flops # ", Aizu, Japan" }
+@string{icalp =     "International Colloquium on Automata, Languages and Programming (ICALP)"}
+@string{plpv =      "Programming Languages meets Program Verification (PLPV)"}
+@STRING{post =      "Principles of Security and Trust (POST)" }
+@string{ppdp =      "ACM Conference on Principles and Practice of Declarative Programming (PPDP)"}
+@string{scheme =    "Scheme and Functional Programming Workshop (Scheme)"}
+@string{stop =      "Workshop on Script-to-Program Evolution (STOP)"}
+@string{tldi =      "ACM SIGPLAN Workshop on Types in Language Design and Implementation (TLDI)"}
+@STRING{tacs =      "Theoretical Aspects of Computer Software (TACS)" }
+@STRING{tacs01 =    tacs # ", Sendai, Japan" }
+@string{tfp =       "Trends in Functional Programming (TFP)"}
+@STRING{webdb =     {International Workshop on the Web and Databases
+                    (WebDB)} }
+@STRING{langsec = "Language-theoretic Security IEEE Security and Privacy Workshop (LangSec)"}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+@incollection{scott71,
+  title={Continuous lattices},
+  author={Scott, Dana},
+  booktitle={Toposes, algebraic geometry and logic},
+  pages={97--136},
+  year={1972},
+}
+
+@article{scott76,
+  title={Data types as lattices},
+  author={Scott, Dana},
+  journal={Siam Journal on computing},
+  volume={5},
+  number={3},
+  pages={522--587},
+  year={1976},
+  publisher={SIAM}
+}
+
+@inproceedings{wadler-findler09,
+ author = "Philip Wadler and Robert Bruce Findler",
+ title = "Well-typed programs can't be blamed",
+ booktitle = esop,
+ year = 2009,
+ month = mar,
+ pages = "1--16",
+ location = "York, UK"
+}
+
+@inproceedings{tobin-hochstadt06,
+ author = "Sam Tobin-Hochstadt and Matthias Felleisen",
+ title = "Interlanguage Migration: From Scripts to Programs",
+ booktitle = dls,
+ month = oct,
+ pages = "964--974",
+ year = 2006,
+}
+
+@inproceedings{tobin-hochstadt08,
+ author = {Sam Tobin-Hochstadt and Matthias Felleisen},
+ title = {The Design and Implementation of Typed Scheme},
+ booktitle = popl08,
+ year = {2008}}
+
+@inproceedings{siek-taha06,
+ author = "Jeremy G. Siek and Walid Taha",
+ title = "Gradual Typing for Functional Languages",
+ booktitle = scheme,
+ month = sep,
+ pages = "81--92",
+ year = 2006
+}
+
+@inproceedings{scott80,
+  author="Dana S. Scott",
+  booktitle="To H.B. Curry: Essays on Combinatory Logic, Lambda Calculus and Formalism",
+  editor="J.P. Seldin and J.R. Hindley",
+  title="Relating Theories of the Lambda-Calculus",
+  year=1980,
+  pages="403--450"}
+
+@inproceedings{findler-felleisen02,
+author = {Robert Bruce Findler and Matthias Felleisen},
+title = {Contracts for higher-order functions},
+booktitle = icfp,
+year = {2002},
+month = sep,
+pages = {48--59}
+}
+
+@inproceedings{findler-blume06,
+ author = "Robby Findler and Matthias Blume",
+ title = "Contracts as Pairs of Projections",
+ booktitle = flops,
+ month = apr,
+ year = 2006,
+}
+@inproceedings{siek-wadler10,
+  author    = {Jeremy G. Siek and Philip Wadler},
+  title     = {Threesomes, with and without blame},
+  booktitle = popl,
+  pages     = {365--376},
+  year      = {2010},
+}
+
+@inproceedings{longley03,
+  title={Universal types and what they are good for},
+  author={Longley, John R},
+  booktitle={Domain theory, logic and computation},
+  pages={25--63},
+  year={2003},
+}
+@inproceedings{dimoulas12,
+title = "Complete Monitors for Behavioral Contracts",
+author = "Christos Dimoulas and Sam Tobin-Hochstadt and Matthias Felleisen",
+booktitle = esop,
+year = 2012,
+month = mar}
+@inproceedings{mz2015functors,
+ author = {Melli\`{e}s, Paul-Andr{\'e} and Zeilberger, Noam},
+ title = {Functors Are Type Refinement Systems},
+ booktitle = {Proceedings of the 42Nd Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
+ series = {POPL '15},
+ year = {2015},
+ isbn = {978-1-4503-3300-9},
+ location = {Mumbai, India},
+ pages = {3--16},
+ numpages = {14},
+ url = {http://doi.acm.org/10.1145/2676726.2676970},
+ doi = {10.1145/2676726.2676970},
+ acmid = {2676970},
+ publisher = {ACM},
+ address = {New York, NY, USA},
+ keywords = {category theory, refinement types, type theory},
+} 
+@article{mz2015isbellpreprint,
+  author    = {Paul{-}Andr{\'{e}} Melli{\`{e}}s and
+               Noam Zeilberger},
+  title     = {Isbell Duality for Refinement Types},
+  journal   = {CoRR},
+  volume    = {abs/1501.05115},
+  year      = {2015},
+  url       = {http://arxiv.org/abs/1501.05115},
+  timestamp = {Mon, 02 Feb 2015 14:12:25 +0100},
+  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/MelliesZ15},
+  bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+@inproceedings{mz2016bifibrational,
+  author    = {Paul{-}Andr{\'{e}} Melli{\`{e}}s and
+               Noam Zeilberger},
+  title     = {A bifibrational reconstruction of Lawvere's presheaf hyperdoctrine},
+  booktitle = {Proceedings of the 31st Annual {ACM/IEEE} Symposium on Logic in Computer
+               Science, {LICS} '16, New York, NY, USA, July 5-8, 2016},
+  pages     = {555--564},
+  year      = {2016},
+  crossref  = {DBLP:conf/lics/2016},
+  url       = {http://doi.acm.org/10.1145/2933575.2934525},
+  doi       = {10.1145/2933575.2934525},
+  timestamp = {Thu, 27 Oct 2016 11:27:54 +0200},
+  biburl    = {http://dblp.uni-trier.de/rec/bib/conf/lics/MelliesZ16},
+  bibsource = {dblp computer science bibliography, http://dblp.org}
+}
+
+
+@article{shulman2008framed,
+  title={Framed bicategories and monoidal fibrations},
+  author={Shulman, Michael},
+  journal={Theory and Applications of Categories},
+  volume={20},
+  number={18},
+  pages={650--738},
+  year={2008}
+}
+
+@article{courser2016bicategory,
+  title={A bicategory of decorated cospans},
+  author={Courser, Kenny},
+  journal={arXiv preprint arXiv:1605.08100},
+  year={2016}
+}
+
+@article{garner2006,
+author = {Garner, Richard},
+journal = {Cahiers de Topologie et Géométrie Différentielle Catégoriques},
+number = 4,
+pages = {261-317},
+title = {Double clubs},
+volume = 47,
+year = 2006,
+}
+
+@misc{shulman2009lax,
+  title = {The Problem with Lax Functors},
+  author = {Shulman, Mike},
+  howpublished = {\url{https://golem.ph.utexas.edu/category/2009/12/the_problem_with_lax_functors.html}},
+  note = {Accessed: 2016-11-22},
+}
+
+@misc{harley,
+  title = {The Combination of Dynamic and Static Typing from a Categorical Perspective},
+  author = {Harley Eades III and Michael Townsend},
+  howpublished = {\url{http://metatheorem.org/includes/pubs/Grady-Draft.pdf}},
+  year={2017},
+  note = {Accessed: 2017-7-7},
+}
+
+
+@article{hayashi85,
+  author    = {Susumu Hayashi},
+  title     = {Adjunction of Semifunctors: Categorical Structures in Nonextensional
+               Lambda Calculus},
+  journal   = {Theor. Comput. Sci.},
+  volume    = {41},
+  pages     = {95--104},
+  year      = {1985},
+}
+
+@article{felleisen90,
+  title={On the expressive power of programming languages},
+  author={Felleisen, Matthias},
+  journal={ESOP'90},
+  year={1990},
+}
+@article{benton05:embedded,
+  title={Embedded Interpreters},
+  author={Nick Benton},
+  journal=jfp,
+  volume={15},
+  number={04},
+  pages={503--542},
+  year={2005},
+  publisher={Cambridge Univ Press}
+}
+@article{henglein94:dynamic-typing,
+ author="Fritz Henglein",
+ title="Dynamic Typing: Syntax and Proof Theory",
+ journal=scp,
+ volume="22",
+ number="3",
+ pages="197--230",
+ year="1994"
+}
+@article{favonia17,
+ title={Correctness of compiling polymorphism to dynamic typing},
+ volume={27},
+ journal={Journal of Functional Programming},
+ author={Keun-Bang Hou (Favonia) and Nick Benton and Robert Harper},
+ year={2017}
+}
+@inproceedings{AGT,
+ author = {Garcia, Ronald and Clark, Alison M. and Tanter, \'{E}ric},
+ title = {Abstracting Gradual Typing},
+ booktitle = popl,
+ year={2016}
+}
+@article{pitts96,
+title = "Relational Properties of Domains",
+journal = "Information and Computation",
+volume = "127",
+number = "2",
+pages = "66 - 90",
+year = "1996",
+author = "Andrew M. Pitts",
+}
+
+@article{smythplotkin,
+  title={The category-theoretic solution of recursive domain equations},
+  author={Smyth, Michael B and Plotkin, Gordon D},
+  journal={SIAM Journal on Computing},
+  volume={11},
+  number={4},
+  year={1982},
+}
+
+@inproceedings{refined,
+ author={Jeremy Siek and Micahel Vitousek and Matteo Cimini and John Tang Boyland},
+ title={Refined Criteria for Gradual Typing},
+ booktitle = "1st Summit on Advances in Programming Languages",
+ series={SNAPL 2015},
+ year={2015},
+}
+@inproceedings{gradualizer16,
+ author = {Cimini, Matteo and Siek, Jeremy G.},
+ title = {The Gradualizer: A Methodology and Algorithm for Generating Gradual Type Systems},
+ booktitle = {Proceedings of the 43rd Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
+ series = {POPL '16},
+ year={2016}                 
+}
+@inproceedings{gradualizer17,
+ author = {Cimini, Matteo and Siek, Jeremy G.},
+ title = {Automatically Generating the Dynamic Semantics of Gradually Typed Languages},
+ booktitle = {Proceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming Languages},
+ series = {POPL 2017},
+ year = {2017},
+ pages = {789--803},
+}
+
+@inproceedings{gradeffects2014,
+ author = {Ba\~{n}ados Schwerter, Felipe and Garcia, Ronald and Tanter, \'{E}ric},
+ title = {A Theory of Gradual Effect Systems},
+ booktitle = {Proceedings of the 19th ACM SIGPLAN International Conference on Functional Programming},
+ series = {ICFP '14},
+ location = {Gothenburg, Sweden},
+ pages = {283--295},
+ year={2014}                  
+} 
+
+@inproceedings{disney2011gradual,
+  title={Gradual information flow typing},
+  author={Disney, Tim and Flanagan, Cormac},
+  booktitle={International Workshop on Scripts to Programs 2011},
+  year={2011}
+}
+
+@inproceedings{ahmed17,
+author = {Amal Ahmed and Dustin Jamner and Jeremy G. Siek and Philip Wadler},
+title = {Theorems for Free for Free: Parametricity, With and Without Types},
+booktitle = icfp,
+year = {2017},
+}
+@inproceedings{igarashipoly17,
+author = {Yuu Igarashi and Taro Sekiyama and Atsushi Igarashi},
+title = {On Polymorphic Gradual Typing},
+booktitle = icfp17,
+year = {2017},
+}
+@inproceedings{igarashisession17,
+author = {Atsushi Igarashi and Peter Thiemann and Vasco Vasconcelos and Philip Wadler},
+title = {Gradual Session Types},
+booktitle = icfp,
+year = {2017},
+}
+@Inbook{freyd91,
+author="Freyd, Peter",
+editor="Carboni, Aurelio and Pedicchio, Maria Cristina and Rosolini, Guiseppe",
+title="Algebraically complete categories",
+bookTitle="Category Theory: Proceedings of the International Conference held in Como, Italy, July 22--28, 1990",
+year="1991",
+pages="95--104",
+}
+@inproceedings{siek-taha07,
+ author = "Jeremy G. Siek and Walid Taha",
+ title = "Gradual Typing for Objects",
+ booktitle = ecoop,
+ year = 2007,
+}
+@article{plotkin1993logic,
+  title={A logic for parametric polymorphism},
+  author={Plotkin, Gordon and Abadi, Mart{\'\i}n},
+  journal={Typed Lambda Calculi and Applications},
+  pages={361--375},
+  year={1993},
+}
+@inproceedings{lehmann17,
+ author = {Lehmann, Nico and Tanter, \'{E}ric},
+ title = {Gradual Refinement Types},
+ booktitle = popl,
+ year={2017},
+} 
+@article{felleisen-hieb,
+    author = "Matthias Felleisen and Robert Hieb",
+    title = "A Revised Report on the Syntactic Theories of Sequential Control and State",
+    journal = "Theor. Comput. Sci.",
+    volume = "103",
+    number = "2",
+    pages = "235--271",
+    year = "1992",
+}
+@article{kock95,
+title = "Monads for which structures are adjoint to units",
+journal = "Journal of Pure and Applied Algebra",
+volume = "104",
+number = "1",
+pages = "41 - 59",
+year = "1995",
+author = "Anders Kock",
+}
+
+@inproceedings{dagand16,
+ author = {Dagand, Pierre-Evariste and Tabareau, Nicolas and Tanter, \'{E}ric},
+ title = {Partial Type Equivalences for Verified Dependent Interoperability},
+ booktitle = icfp,
+ year = {2016},
+ location = {Nara, Japan},
+}
+
+@book{Lambek:1986,
+ author = {Lambek, J. and Scott, P. J.},
+ title = {Introduction to Higher Order Categorical Logic},
+ year = {1986},
+ isbn = {0-521-24665-2},
+ publisher = {Cambridge University Press},
+} 
+
+@article{crutwell-shulman,
+Author = {G. S. H. Cruttwell and Michael A. Shulman},
+Title = {A unified framework for generalized multicategories},
+Year = {2009},
+Eprint = {arXiv:0907.2460},
+journal = {Theory and Applications of Categories},
+volume = {24},
+number = {21}
+}
+
+@article{wand79ocat,
+title = "Fixed-point constructions in order-enriched categories",
+journal = "Theoretical Computer Science",
+volume = "8",
+number = "1",
+pages = "13 - 30",
+year = "1979",
+note = "",
+author = "Mitchell Wand"
+}
+
+@phdthesis{Dunphy:2002,
+ author = {Dunphy, Brian Patrick},
+ advisor = {Reddy, Uday},
+ title = {Parametricity As a Notion of Uniformity in Reflexive Graphs},
+ year = {2002},
+ isbn = {0-493-89955-3},
+ publisher = {University of Illinois at Urbana-Champaign},
+ address = {Champaign, IL, USA},
+}
+
+@article{ohearn95,
+author = "O'Hearn, Peter W. and Tennent, Robert D.",
+title = "Parametricity and Local Variables",
+journal = jacm,
+volume = "42",
+number = "3",
+pages = "658--709",
+month = "May",
+year = "1995"}
+
+@InProceedings{johann15bifibrational,
+  title =       {Bifibrational Functorial Semantics for Parametric Polymorphism},
+  author =        {Neil Ghani and Patricia Johann and Fredrik Nordvall Forsberg and Federico Orsanigo and Tim Revell},
+  booktitle = { Proceedings of Mathematical Foundations of Program Semantics},
+  year = {2015}
+}
+
+@book{constable+86nuprl-book,
+  author =    "Robert L. Constable and Stuart F. Allen and H. M. Bromley and 
+               W. R. Cleaveland and J. F. Cremer and R. W. Harper and Douglas J. Howe and 
+               T. B. Knoblock and N. P. Mendler and P. Panangaden and James T. Sasaki and
+               Scott F. Smith",
+  title="Implementing Mathematics with the {NuPRL} Proof Development System",
+  publisher={Prentice Hall},
+  year=1986
+}
+
+@unpublished{dagand:hal-01629909,
+  TITLE = {{Foundations of Dependent Interoperability}},
+  AUTHOR = {Dagand, Pierre-Evariste and Tabareau, Nicolas and Tanter, {\'E}ric},
+  URL = {https://hal.inria.fr/hal-01629909},
+  NOTE = {working paper or preprint},
+  YEAR = {2017},
+}
+
+@inproceedings{statonlevy13,
+ author = {Staton, Sam and Levy, Paul Blain},
+ title = {Universal Properties of Impure Programming Languages},
+ booktitle = popl,
+ year = 2013
+}
+
+@inproceedings{seely1984locally,
+  title={Locally cartesian closed categories and type theory},
+  author={Seely, Robert AG},
+  booktitle={Mathematical proceedings of the Cambridge philosophical society},
+  volume={95},
+  number={1},
+  pages={33--48},
+  year={1984},
+  organization={Cambridge University Press}
+}
+
+@inproceedings{hofmann1994interpretation,
+  title={On the interpretation of type theory in locally cartesian closed categories},
+  author={Hofmann, Martin},
+  booktitle={International Workshop on Computer Science Logic},
+  pages={427--441},
+  year={1994},
+}
+
+@article{clairambault2014biequivalence,
+  title={The biequivalence of locally cartesian closed categories and Martin-L{\"o}f type theories},
+  author={Clairambault, Pierre and Dybjer, Peter},
+  journal={Mathematical Structures in Computer Science},
+  volume={24},
+  number={6},
+  year={2014},
+  publisher={Cambridge University Press}
+}
+
+@article{Vakarthesis,
+  author    = {Matthijs V{\'{a}}k{\'{a}}r},
+  title     = {In Search of Effectful Dependent Types},
+  journal   = {CoRR},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1706.07997},
+}
+
+@inbook{ahmanghaniplotkin16,
+  title     = "Dependent Types and Fibred Computational Effects",
+  author    = "Daniel Ahman and Neil Ghani and Plotkin, {Gordon D.}",
+  year      = "2016",
+  doi       = "10.1007/978-3-662-49630-5_3",
+  booktitle = "Foundations of Software Science and Computation Structures",
+}
+@inproceedings{ahmed08:paramseal,
+title = "Parametric polymorphism through run-time sealing, or, Theorems for low, low prices!",
+author = "Jacob Matthews and Amal Ahmed",
+booktitle = esop,
+year = 2008,
+month = mar,
+location = "Budapest, Hungary",
+}
+@phdthesis{levy01:phd,
+author = "Levy, Paul Blain",
+title = "Call-by-Push-Value",
+type = "{Ph.~D.} Dissertation",
+school = "Queen Mary, University of London",
+department = "Department of Computer Science",
+address = "London, UK",
+month = mar,
+year = "2001"}
+
+@inproceedings{neis09,
+ author = {Georg Neis and Derek Dreyer and Andreas Rossberg},
+ title = {Non-Parametric Parametricity},
+ booktitle = icfp,
+ pages = "135--148",
+ year = {2009},
+ month = sep
+}
+
+@Article{moggi91,
+  author =       {Eugenio Moggi},
+  title =        {Notions of computation and monads},
+  journal =      {Inform. And Computation},
+  year =         {1991},
+  volume =    {93},
+  number =    {1},
+}
+ @inproceedings{Wolff:2011:GT,
+ author = {Wolff, Roger and Garcia, Ronald and Tanter, \'{E}ric and Aldrich, Jonathan},
+ title = {Gradual Typestate},
+ booktitle = {Proceedings of the 25th European Conference on Object-oriented Programming},
+ series = {ECOOP'11},
+ year = {2011},
+}
+
+@article{extended,
+  author    = {Max S. New and
+               Daniel R. Licata},
+  title     = {Call-by-name Gradual Type Theory},
+  journal   = {CoRR},
+  year      = {2018},
+  url       = {http://arxiv.org/abs/1802.00061},
+}
+
+@article{newlicata2018-fscd,
+  author    = {Max S. New and
+               Daniel R. Licata},
+  title     = {Call-by-name Gradual Type Theory},
+  journal   = {FSCD},
+  year      = {2018},
+}
+
+@article{martinlof83sienna,
+	year = {1983/1996},
+	journal = {Nordic Journal of Philosophical Logic},
+	publisher = {Scandinavian University Press},
+	volume = {1},
+	author = {Per Martin{-}L\"of},
+	number = {1},
+	title = {On the Meanings of the Logical Constants and the Justifications of the Logical Laws (Sienna Lectures)},
+	pages = {11--60}
+}
+
+@Article{pfenningdavies,
+    author = {Frank Pfenning and Rowan Davies},
+    title = {A Judgmental Reconstruction of Modal Logic},
+    journal = {Mathematical Structures in Computer Science},
+    year = {2001},
+    volume = {11}, 
+    pages = {511-540},
+}
+
+@article{dagand18,
+  title={Foundations of dependent interoperability},
+  volume={28},
+  DOI={10.1017/S0956796818000011},
+  journal={Journal of Functional Programming},
+  publisher={Cambridge University Press},
+  author={Dagand, Pierr-\'{E}variste and Tabareau, Nicolas and Tanter, \'{E}ric},
+  year={2018},
+  pages={e9}
+}
+  
+@inproceedings{thatte90,
+ author = "Satish Thatte",
+ title = "Quasi-static typing",
+ booktitle = popl,
+ pages = "367--381",
+ year = 1990
+}
+
+@misc{hejlsberg12,
+	Author = {Anders Hejlsberg},
+	Date-Added = {2014-01-06 15:39:32 +0000},
+	Date-Modified = {2014-09-11 10:40:22 +0000},
+	Howpublished = {Microsoft Channel 9 Blog},
+	Title = {Introducing {TypeScript}},
+	Year = {2012}}
+
+@incollection{bierman14,
+	Author = {Bierman, Gavin and Abadi, Mart{\'\i}n and Torgersen, Mads},
+	Booktitle = {ECOOP 2014 -- Object-Oriented Programming},
+	Date-Added = {2014-10-06 15:02:06 +0000},
+	Date-Modified = {2015-11-06 03:05:46 +0000},
+	Editor = {Jones, Richard},
+	Language = {English},
+	Pages = {257-281},
+	Publisher = {Springer Berlin Heidelberg},
+	Series = {Lecture Notes in Computer Science},
+	Title = {Understanding {TypeScript}},
+	Volume = {8586},
+	Year = {2014}}
+
+@inproceedings{verlaguet13,
+	Author = {Julien Verlaguet},
+	Booktitle = {Commercial Users of Functional Programming (CUFP)},
+	Date-Added = {2014-01-06 15:28:27 +0000},
+	Date-Modified = {2014-04-14 11:23:23 +0000},
+	Title = {Facebook: Analyzing {PHP} statically},
+	Year = {2013}}
+
+@url{chaudhuri14,
+	Author = {Avik Chaudhuri},
+	Date-Added = {2015-07-16 16:32:06 +0000},
+	Date-Modified = {2015-11-18 04:29:52 +0000},
+	Keywords = {gradual typing},
+	Title = {Flow: a static type checker for JavaScript},
+	Url = {http://flowtype.org/},
+	Urldate = {2014},
+	Year = {2014},
+	Bdsk-Url-1 = {http://flowtype.org/}}
+
+@manual{bracha11,
+	Author = {Gilad Bracha},
+	Date-Added = {2014-04-14 10:26:35 +0000},
+	Date-Modified = {2014-04-14 10:27:35 +0000},
+	Month = {October},
+	Organization = {Google},
+	Title = {Optional Types in {Dart}},
+	Year = {2011}}
+
+@misc{hejlsberg10,
+	Author = {Anders Hejlsberg},
+	Date-Added = {2014-01-06 15:43:48 +0000},
+	Date-Modified = {2014-01-06 16:09:03 +0000},
+	Howpublished = {Microsoft Channel 9 Blog},
+	Month = {April},
+	Title = {C\# 4.0 and beyond by Anders Hejlsberg},
+	Year = {2010}}
+
+@Misc{turner14:typescript,
+  author = 	 {Jonathan Turner},
+  title = 	 {TypeScript and the Road to 2.0},
+  Url = {https://blogs.msdn.microsoft.com/typescript/2014/10/22/typescript-and-the-road-to-2-0/},
+  month = 	 {October},
+  year = 	 2014}
+
+@inproceedings{siek15:mono,
+ author = {Siek, Jeremy G. and Vitousek, Michael M. and Cimini, Matteo and Tobin-Hochstadt, Sam and Garcia, Ronald},
+ title = {Monotonic References for Efficient Gradual Typing},
+ booktitle = {Proceedings of the 24th European Symposium on Programming on Programming Languages and Systems - Volume 9032},
+ year = {2015},
+}
+
+@misc{hack14:blog,
+ author = {Julien Verlaguet and Alek Menghrajani},
+ title  = {Hack: a new programming language for HHVM},
+ url = {https://code.facebook.com/posts/264544830379293/hack-a-new-programming-language-for-hhvm/},
+ month = {March},
+ year  = {2014}
+}
+
+@inproceedings{gronski06,
+ author = "Jessica Gronski and Kenneth Knowles and Aaron Tomb and Stephen N. Freund and Cormac Flanagan",
+ title = "Sage: Hybrid Checking for Flexible Specifications",
+ booktitle = scheme,
+ month = sep,
+ pages = "93--104",
+ year = 2006
+}
+
+@inproceedings{Ina:2011zr,
+	Author = {Ina, Lintaro and Igarashi, Atsushi},
+	Booktitle = {Proceedings of the 2011 ACM international conference on Object oriented programming systems languages and applications},
+	Date-Added = {2012-11-09 23:27:38 +0000},
+	Date-Modified = {2012-12-20 08:58:22 -0700},
+	Keywords = {dynamic types, generics, gradual typing},
+	Series = {OOPSLA '11},
+	Title = {Gradual typing for generics},
+	Year = {2011}}
+
+@inproceedings{swamy14,
+author    = {Nikhil Swamy and C{\'e}dric Fournet and Aseem Rastogi and
+  Karthikeyan Bhargavan and Juan Chen and Pierre-Yves Strub and Gavin M. Bierman},
+title     = {Gradual typing embedded securely in {J}ava{S}cript},
+booktitle = popl14,
+year      = {2014},
+pages     = {425-438}
+}
+
+@article{Allende:2013aa,
+	Author = {Esteban Allende and Oscar Calla{\'u} and Johan Fabry and {\'E}ric Tanter and Marcus Denker},
+	Date-Added = {2014-03-29 03:55:17 +0000},
+	Date-Modified = {2014-03-29 03:55:20 +0000},
+	Journal = {Science of Computer Programming},
+	Month = aug,
+	Note = {Available online},
+	Publisher = {Elsevier},
+	Title = {Gradual Typing for {Smalltalk}},
+	Urldoi = {http://dx.doi.org/10.1016/j.scico.2013.06.006},
+	Urlpdf = {http://pleiad.dcc.uchile.cl/papers/2013/allendeAl-scp2013.pdf},
+	Users = {eallende , ocallau , jfabry , etanter},
+	Year = 2013}
+
+@inproceedings{ahmed06:lr,
+title = "Step-Indexed Syntactic Logical Relations for Recursive and Quantified Types",
+author = "Amal Ahmed",
+booktitle = esop,
+year = 2006,
+month = mar,
+location = "Vienna, Austria",
+pages = "69--83"}
+
+@techreport{ahmed06:esop06-techrpt,
+author = "Amal Ahmed",
+title = "Step-Indexed Syntactic Logical Relations for Recursive and Quantified Types",
+institution = "Harvard University",
+number = "TR-01-06",
+month = jan,
+year = 2006,
+note="Available at \texttt{http://www.cs.indiana.edu/\linebreak[0]$\sim$amal/\linebreak[0]papers/\linebreak[0]lr-recquant-techrpt.pdf}"}
+
+@article{fuhrmann1999direct,
+  title={Direct models of the computational lambda-calculus},
+  author={F{\"u}hrmann, Carsten},
+  journal={Electronic Notes in Theoretical Computer Science},
+  volume={20},
+  pages={245--292},
+  year={1999},
+}