DG scalar advection on moving mesh (ALE)

I am trying to implement a DG-ALE solver for the following simple scalar advection problem: \frac{\partial c}{\partial t} +\mathbf{u}\cdot\nabla c = 0

The weak form is given by \int_K\frac{\partial c}{\partial t}vdx +\int_{\partial K}\mathbf{u}\cdot\nabla cvdx = 0. Integrating by parts and assuming incompressibility (\nabla \cdot u =0):

\int_K\frac{\partial c}{\partial t}vdx +\int_{\partial K}c\mathbf{u}\cdot \mathbf{n}vds - \int_K c\mathbf{u} \cdot \nabla v dx=0

With the choice of a numerical flux \hat{uc}, the second term can be written as: \int_F (\hat{\mathbf{u}c}\cdot \mathbf{n})^+jump(v)ds since \mathbf{n}^+ = \mathbf{n}^-. Finally the upwind and downwind fluxes:
uU = 0.5(\mathbf{u}\cdot \mathbf{n}+ |\mathbf{u}\cdot \mathbf{n}|)
uD = 0.5(\mathbf{u}\cdot \mathbf{n}- |\mathbf{u}\cdot \mathbf{n}|)

The time derivative is discretized using a second order backward difference scheme:
\int_K(\frac{1.5 c - 2 c^n + 0.5 c^{n-1}}{dt} )v dx
All this works to my satisfaction in the code below. As for the ALE implementation (which can be enabled by setting ALE = True), I am trying to discretize the time derivative in the reference/initial domain, and the rest of the terms in the current domain. Hence the following kinematic quantities are introduced: the gradient G of the ALE deformation wrt to X0, and the corresponding determinant J=det(G) for times n, n-1. From my math, I get:
\int_K J^n(\frac{1.5 J c - 2J^n c^n + 0.5 J^{n-1}c^{n-1}}{dt} )v dx
However, the solution doesn’t look right (the mesh motion seems to be polluting the solution.)
Can anyone help spot the error in this formulation and/or the implementation?

import dolfin
from dolfin import dot, div, grad, jump
ALE = False  # Turn ALE on or off
mesh = dolfin.RectangleMesh(dolfin.Point(0,0), dolfin.Point(2,2), 64,64)
# Trial and test functions for advected scalar c
Vc = dolfin.FunctionSpace(mesh, 'DG',0)
c = dolfin.TrialFunction(Vc)
d = dolfin.TestFunction(Vc)
cp = dolfin.Function(Vc)    # c at time t
cpp = dolfin.Function(Vc)   # c at time t-1

# initial condition for c
cp_initial = dolfin.Expression('(0.25 <= x[0] && x[0] <= 0.5 && 0.25 <= x[1] && x[1] <= 0.5) ? 1.0 : 0.0', degree = 1)
cp.assign(dolfin.interpolate(cp_initial, Vc))
cpp.assign(dolfin.interpolate(cp_initial, Vc))

# dummy convecting velocity - diagonal upwards at 45 deg
Vu = dolfin.VectorFunctionSpace(mesh, 'DG', 2)
u_conv = dolfin.interpolate(dolfin.Expression(('1','1'), degree =1), Vu)

# Second order backward difference time stepping coefficients
c1, c2, c3 = 3/2, -2, 1/2
dt = 0.01
t = 0  # start time
T = 0.5  # Total time
if ALE:
    # ALE setup
    # initial domain coordinates as dolfin func
    ale_x0 = dolfin.interpolate(dolfin.Expression(["x["+str(i)+"]" for i in range(2)], degree=1),
                  dolfin.VectorFunctionSpace(mesh, "CG", 1))
    G = dolfin.inv(dolfin.grad(ale_x0))

    # UFL determinant of ALE domain transformation for previous two time steps, initially all equal:
    J = dolfin.det(G)
    Jp = dolfin.det(G)
    Jpp = dolfin.det(G)

    Vu_mesh = dolfin.VectorFunctionSpace(mesh, 'CG', 1)

    u_mesh_cpp = dolfin.Expression(('cos(pi/2*x[1])*sin(pi/2*x[1])*pow(sin(pi/2*x[0]), 2.0)*sin(pi/2*t)*2', '-cos(pi/2*x[0])*sin(pi/2*x[0])*pow(sin(pi/2*x[1]), 2.0)*sin(pi/2*t)*2'), degree =1,pi=dolfin.pi, t=0)
    u_mesh = dolfin.interpolate(u_mesh_cpp, Vu)

else:
    u_mesh = dolfin.Function(Vu)
    u_mesh.vector()[:] = 0

# Output
xdmf_file = dolfin.XDMFFile('DG_advection.xdmf')
xdmf_file.parameters['flush_output'] = True
xdmf_file.parameters['rewrite_function_mesh'] = True
xdmf_file.parameters['functions_share_mesh'] = True
c_new = dolfin.Function(Vc)
c_new.rename('c','c'); u_mesh.rename('u_mesh', 'u_mesh')

while t <T:
    n = dolfin.FacetNormal(mesh)
    dS, dx = dolfin.dS(mesh), dolfin.dx(mesh)

    # Upstream and downstream normal velocities
    w_nU = (dot(u_conv - u_mesh, n) + abs(dot(u_conv - u_mesh, n))) / 2
    w_nD = (dot(u_conv - u_mesh, n) - abs(dot(u_conv - u_mesh, n))) / 2

    # Blending factor beta (0: pure upwind, 1: pure downwind)
    b = 0
    flux = (1 - b) * jump(c * w_nU) + b * jump(c * w_nD)

     # Discontinuous Galerkin implementation of the advection equation
    if  ALE:
        eq = (
            Jp*(J*c1 * c + Jp * c2 * cp + Jpp*c3 * cpp) / dt * d * dx
            - dot(c * u_conv, grad(d)) * dx
            + flux * jump(d) * dS
            )
    else:
        eq = (
            (c1 * c + c2 * cp + c3 * cpp) / dt * d * dx
            - dot(c * (u_conv - u_mesh), grad(d)) * dx
            + flux * jump(d) * dS
            )
    a, L = dolfin.system(eq)
    dolfin.solve(a==L, c_new)

    if ALE:
        u_mesh_cpp.t = t
        u_mesh.assign(dolfin.interpolate(u_mesh_cpp, Vu_mesh))
        mesh.bounding_box_tree().build(mesh)

        displacement = dolfin.Function(Vu_mesh)
        displacement.assign(u_mesh)
        displacement.vector()[:] *= dt

        dolfin.ALE.move(mesh, displacement)
        Jpp = Jp
        Jp = J
        G = dolfin.inv(dolfin.grad(ale_x0))
        J = dolfin.det(G)

    for func in [c_new, u_mesh]:
        xdmf_file.write(func,t)

    cpp.assign(cp)
    cp.assign(c_new)
    print(f't={t}')
    t+=dt

xdmf_file.close()