Residual: nan when using "lu" solver

Hello, I am facing an error while implementing this tutorial in 3D by modifying the degree of u and stress and boundary conditions, when I increase my mesh density say 10 or more, it runs out of memory, and when I keep mesh density too low to make it work, it returns Residual: nan.
I have tries “umfpack” which gives same result which is Residual: nan and on finer mesh, runs out of memory. When I use “mumps” there is this error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_4680/2031601198.py in <module>
     12     niter = 0
     13     while nRes/nRes0 > tol and niter < Nitermax:
---> 14         solve(A, du.vector(), Res, "mumps")
     15         Du.assign(Du+du)
     16         deps = eps(Du)

/usr/lib/petsc/lib/python3/dist-packages/dolfin/fem/solving.py in solve(*args, **kwargs)
    238             raise RuntimeError("Not expecting keyword arguments when solving linear algebra problem.")
    239 
--> 240         return dolfin.la.solver.solve(*args)
    241 
    242 

/usr/lib/petsc/lib/python3/dist-packages/dolfin/la/solver.py in solve(A, x, b, method, preconditioner)
     70     """
     71 
---> 72     return cpp.la.solve(A, x, b, method, preconditioner)

RuntimeError: 

*** -------------------------------------------------------------------------
*** DOLFIN encountered an error. If you are not able to resolve this issue
*** using the information listed below, you can ask for help at
***
***     fenics-support@googlegroups.com
***
*** Remember to include the error message listed below and, if possible,
*** include a *minimal* running example to reproduce the error.
***
*** -------------------------------------------------------------------------
*** Error:   Unable to solve linear system using PETSc Krylov solver.
*** Reason:  Solution failed to converge in 0 iterations (PETSc reason DIVERGED_PC_FAILED, residual norm ||r|| = 0.000000e+00).
*** Where:   This error was encountered inside PETScKrylovSolver.cpp.
*** Process: 0
*** 
*** DOLFIN version: 2019.2.0.dev0
*** Git changeset:  ubuntu
*** -------------------------------------------------------------------------

I looked upon the web and found out in this post that at first iteration the solution has been found. I have no clue what should I use to get this code working.
Here is minimum working example -

from dolfin import *
import numpy as np
parameters["form_compiler"]["representation"] = 'quadrature'
import warnings
from ffc.quadrature.deprecation import QuadratureRepresentationDeprecationWarning
warnings.simplefilter("once", QuadratureRepresentationDeprecationWarning)

E = Constant(70e3)
nu = Constant(0.3)
lmbda = E*nu/(1+nu)/(1-2*nu)
mu = E/2./(1+nu)
sig0 = Constant(250.) 
Et = E/100.
H = E*Et/(E-Et) 

from mshr import *
cube = Box(Point(0, 0, 0),Point(1, 1, 1))
domain = cube
mesh = generate_mesh(domain, 5)
plot(mesh);

deg_u = 3
deg_stress = 3
V = VectorFunctionSpace(mesh, "CG", deg_u)
We = VectorElement("Quadrature", mesh.ufl_cell(), degree=deg_stress, dim=4, quad_scheme='default')
W = FunctionSpace(mesh, We)
W0e = FiniteElement("Quadrature", mesh.ufl_cell(), degree=deg_stress, quad_scheme='default')
W0 = FunctionSpace(mesh, W0e)

sig = Function(W)
sig_old = Function(W)
n_elas = Function(W)
beta = Function(W0)
p = Function(W0, name="Cumulative plastic strain")
u = Function(V, name="Total displacement")
du = Function(V, name="Iteration correction")
Du = Function(V, name="Current increment")
v = TrialFunction(V)
u_ = TestFunction(V)
P0 = FunctionSpace(mesh, "DG", 0)
p_avg = Function(P0, name="Plastic strain")

class bottom(SubDomain):
    def inside(self, x, on_boundary):
        return on_boundary and near(x[1], 0., 0.01)
    
class top(SubDomain):
    def inside(self, x, on_boundary):
        return on_boundary and near(x[1], 1., 0.01)

boundaries = MeshFunction("size_t", mesh, mesh.geometry().dim() - 1)

bottom().mark(boundaries, 1)
top().mark(boundaries, 2)

q_lim = float(sig0)
loading = Expression("q*t", q=q_lim, t=0, degree=2)

bc_bottom = DirichletBC(V, Constant((0., 0., 0.)), boundaries, 1)

ds = Measure("ds")(subdomain_data=boundaries)
n = FacetNormal(mesh)

bc_u = [bc_bottom]

def F_ext(v):
    return loading*dot(n, v)*ds(2)

def eps(v):
    e = sym(grad(v))
    return as_tensor([[e[0, 0], e[0, 1], 0],
                      [e[0, 1], e[1, 1], 0],
                      [0, 0, 0]])
def sigma(eps_el):
    return lmbda*tr(eps_el)*Identity(3) + 2*mu*eps_el
def as_3D_tensor(X):
    return as_tensor([[X[0], X[3], 0],
                      [X[3], X[1], 0],
                      [0, 0, X[2]]])

ppos = lambda x: (x+abs(x))/2.
def proj_sig(deps, old_sig, old_p):
    sig_n = as_3D_tensor(old_sig)
    sig_elas = sig_n + sigma(deps)
    s = dev(sig_elas)
    sig_eq = sqrt(3/2.*inner(s, s))
    f_elas = sig_eq - sig0 - H*old_p
    dp = ppos(f_elas)/(3*mu+H)
    n_elas = s/sig_eq*ppos(f_elas)/f_elas
    beta = 3*mu*dp/sig_eq
    new_sig = sig_elas-beta*s
    return as_vector([new_sig[0, 0], new_sig[1, 1], new_sig[2, 2], new_sig[0, 1]]), \
           as_vector([n_elas[0, 0], n_elas[1, 1], n_elas[2, 2], n_elas[0, 1]]), \
           beta, dp

def sigma_tang(e):
    N_elas = as_3D_tensor(n_elas)
    return sigma(e) - 3*mu*(3*mu/(3*mu+H)-beta)*inner(N_elas, e)*N_elas-2*mu*beta*dev(e)

metadata = {"quadrature_degree": deg_stress, "quadrature_scheme": "default"}
dxm = dx(metadata=metadata)

a_Newton = inner(eps(v), sigma_tang(eps(u_)))*dxm
res = -inner(eps(u_), as_3D_tensor(sig))*dxm + F_ext(u_)

def local_project(v, V, u=None):
    dv = TrialFunction(V)
    v_ = TestFunction(V)
    a_proj = inner(dv, v_)*dxm
    b_proj = inner(v, v_)*dxm
    solver = LocalSolver(a_proj, b_proj)
    solver.factorize()
    if u is None:
        u = Function(V)
        solver.solve_local_rhs(u)
        return u
    else:
        solver.solve_local_rhs(u)
        return

Nitermax, tol = 200, 1e-8  
Nincr = 20
load_steps = np.linspace(0, 1.1, Nincr+1)[1:]**0.5
results = np.zeros((Nincr+1, 2))
for (i, t) in enumerate(load_steps):
    loading.t = t
    A, Res = assemble_system(a_Newton, res, bc_u)
    nRes0 = Res.norm("l2")
    nRes = nRes0
    Du.interpolate(Constant((0, 0, 0)))
    print("Increment:", str(i+1))
    niter = 0
    while nRes/nRes0 > tol and niter < Nitermax:
        solve(A, du.vector(), Res, "lu")
        Du.assign(Du+du)
        deps = eps(Du)
        sig_, n_elas_, beta_, dp_ = proj_sig(deps, sig_old, p)
        local_project(sig_, W, sig)
        local_project(n_elas_, W, n_elas)
        local_project(beta_, W0, beta)
        A, Res = assemble_system(a_Newton, res, bc_u)
        nRes = Res.norm("l2")
        print("    Residual:", nRes)
        niter += 1
    u.assign(u+Du)
    p.assign(p+local_project(dp_, W0))
    sig_old.assign(sig)
    p_avg.assign(project(p, P0))
    results[i+1, :] = (u(0.5, 1., 0.)[1], t)

It works fine on 2d mesh but I don’t know how to make it work in 3d.
Kindly give some suggestions on the same.
Thank You.