BC for Blockmatrix in time-loop

Thank you, I highly appreciate your quick answer!

I purposefully opted for these BC, IC and b-vector, for the sake of constructing an MWE.
You are right, the problem is convection dominated, which should be considered in the choice of parameters and time discretization. The b-vector is indeed divergence free, which I should have indicated.

Your last point is very insightful for me. Is there a rule of thumb to decide whether to go for a Mixed Element implementation, or one with separate Elements and a “monolithic” system matrix? My corrected code for reference:

import numpy as np
import ufl
from basix.ufl import element
from dolfinx import plot
from dolfinx.fem import (Constant, Function, dirichletbc, form, 
                         functionspace, locate_dofs_topological)
from dolfinx.fem.petsc import assemble_matrix_block,assemble_vector, create_vector_block,assemble_vector_block, create_vector
from dolfinx.mesh import CellType, create_rectangle, locate_entities_boundary
from dolfinx.mesh import *
from ufl import div, dx, ds, grad, inner, FacetNormal, dot, 
import pyvista
from mpi4py import MPI
from petsc4py import PETSc

# parameters
dt = 0.1
t = 0
T_end = 20*dt
num_steps = int(T_end/dt)

# mesh, spaces, functions
msh = create_unit_square(MPI.COMM_WORLD, 20,20)
x = ufl.SpatialCoordinate(msh)
n = FacetNormal(msh)
P1 = element("Lagrange", "triangle", 1) 
XV = functionspace(msh, P1) 
Xp = functionspace(msh, P1) 
V = ufl.TrialFunction(XV)
p = ufl.TrialFunction(Xp)
W = ufl.TestFunction(XV)
q = ufl.TestFunction(Xp)
V_old = Function(XV)
V_new = Function(XV)
p_old = Function(Xp)
p_new = Function(Xp)
bfield = Constant(msh, (0.1,0.0))

# init cond
def initcond_V(x):
    return (x[0]-0.5)**2 + (x[1]-0.5)**2 < 0.1**2
def initcond_p(x):
    return (x[0]-0.2)**2 + (x[1]-0.7)**2 < 0.1**2
V_old.interpolate(initcond_V)
p_old.interpolate(initcond_p)

# BC
facets = locate_entities_boundary(msh, dim=1,marker=lambda x: np.logical_or.reduce((
                                  np.isclose(x[0], 0.0),
                                  np.isclose(x[0], 1.0),
                                  np.isclose(x[1], 0.0),
                                  np.isclose(x[1], 1.0))))
V_dofs = locate_dofs_topological(XV, 1, facets)
p_dofs = locate_dofs_topological(Xp, 1, facets)
def BC_expressionV(x): return np.zeros(XV.dofmap.index_map.size_global)
def BC_expressionp(x): return np.zeros(Xp.dofmap.index_map.size_global)
BC_V = Function(XV)
BC_V.interpolate(BC_expressionV)
BC_p = Function(Xp)
BC_p.interpolate(BC_expressionp)

BCs = [dirichletbc(BC_V, V_dofs), dirichletbc(BC_p, p_dofs)] #?

# weak form implicit euler
M = inner(V,W)*dx
E = inner(bfield*p,grad(W))*dx
F = inner(V*bfield,grad(q))*dx
N = inner(p,q)*dx
a = form([[M, -dt/2*E],
          [-dt/2*F, N]])

# assembly
A = assemble_matrix_block(a, bcs=BCs)
A.assemble()
Vp_vec = A.createVecLeft()

# solver
solver = PETSc.KSP().create(msh.comm)
solver.setOperators(A)
solver.setType(PETSc.KSP.Type.PREONLY)
solver.getPC().setType(PETSc.PC.Type.LU)

for i in range(num_steps):
    t += dt

    # Update the right hand side
    l = form([inner(grad(V_old), grad(W)) * dx,
            inner(grad(p_old), grad(q)) * dx])
    L = assemble_vector_block(l,a,bcs=BCs)

    # Solve linear problem
    solver.solve(L, Vp_vec)

    # extract solution
    offset = XV.dofmap.index_map.size_global
    V_new.x.array[:] = Vp_vec.array_r[:offset]
    V_new.x.scatter_forward()
    p_new.x.array[:] = Vp_vec.array_r[offset:]
    p_new.x.scatter_forward()

    # Update solution at previous time step
    V_old.x.array[:] = V_new.x.array
    p_old.x.array[:] = p_new.x.array

In the Navier-Stokes and the Stokes demo examples, I could see the assembly of the system matrix - but only when having Dirichlet BC solely on the first unknown. How can I achieve this for the case with BC on both unknowns? The method I am using above, BCs = [dirichletbc(BC_V, V_dofs), dirichletbc(BC_p, p_dofs)], seems to be wrong to me.