# ruff: noqa: F401, I001
import matplotlib.pyplot as plt
import control as ct  # used for sim and analysis
import gymnasium as gym  # use for simulating the cartpole
import numpy as np  # useful python lib for linalg
from IPython.display import display
from scipy.linalg import expm  # used for computing gramian
from scipy.integrate import quad_vec
from akreon_cartpole import cartpole, list_controllers
from akreon_cartpole.controls import SolverConfig
from akreon_cartpole.diagnostics import (
    plot_poles,
    plot_state_step_responses,
    plot_state_trajectories,
    plot_shadow_price_summary,
    plot_rollout_states,
    show_rollout_frames,
)  # nice visualization / plotting util

# list all compiled controllers for cartpole
ctrls = list_controllers()
for [_, case] in enumerate(ctrls):
    print(f"Case {case} -> {ctrls[case]}")

Case cartpole.nominal_feedback -> ['state_feedback', 'polytopic_state_feedback']
Case cartpole.nominal_observer -> ['state_observer', 'polytopic_state_observer']
Case cartpole.disturbance_feedback -> ['h2_state_feedback', 'hinf_state_feedback', 'h2_polytopic_state_feedback', 'hinf_polytopic_state_feedback']
Case cartpole.disturbance_observer -> ['h2_state_observer', 'hinf_state_observer', 'h2_polytopic_state_observer', 'hinf_polytopic_state_observer']
Case cartpole.mixed_sensitivity_feedback -> ['hinf_mixed_sensitivity']

# (1) acquire params of the cartpole from gym
STATE_LABELS = [
    "Cart Position\n$x$",
    "Cart Velocity\n$\\dot{x}$",
    "Pendulum Angle\n$\\theta$",
    "Angular Rate\n$\\dot{\\theta}$",
]

env = gym.make("CartPole-v1")
mc = env.unwrapped.masscart
mp = env.unwrapped.masspole
pole_length = env.unwrapped.length
gravity = env.unwrapped.gravity
CARTPOLE_DT = env.unwrapped.tau
env.close()

# (2) setup the linear state space model derived from the
# linearized euler-langrange equation from the dynamics video
A = np.array(
    [
        [0.0, 1.0, 0.0, 0.0],
        [0.0, 0.0, -(mp / mc) * gravity, 0.0],
        [0.0, 0.0, 0.0, 1.0],
        [0.0, 0.0, ((mp + mc) / (mc * pole_length)) * gravity, 0.0],
    ],
    dtype=np.float64,
)

B = np.array(
    [[0.0], [1.0 / mc], [0.0], [-1.0 / (mc * pole_length)]],
    dtype=np.float64,
)

C = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]], dtype=np.float64)
D = np.zeros((2, 1), dtype=np.float64)

cartpole_ss = ct.ss(A, B, C, D)

print(f"A = \n{A}")
print(f"B = \n{B}")
print(f"C = \n{C}")
print(f"D = \n{D}")

A = 
[[ 0.    1.    0.    0.  ]
 [ 0.    0.   -0.98  0.  ]
 [ 0.    0.    0.    1.  ]
 [ 0.    0.   21.56  0.  ]]
B = 
[[ 0.]
 [ 1.]
 [ 0.]
 [-2.]]
C = 
[[1. 0. 0. 0.]
 [0. 0. 1. 0.]]
D = 
[[0.]
 [0.]]

# (3) perform open-loop analysis of the cartpole system
# (3.a) analyze the poles of the open loop system
open_loop_poles = ct.poles(cartpole_ss)
print(f"Open loop poles: {open_loop_poles}")
plot_poles(open_loop_poles, title="Open Loop Cartpole Poles")

Open loop poles: [ 0.        +0.j  0.        +0.j  4.64327471+0.j -4.64327471+0.j]

# (3.b) determine if the system is controllable

# part 1 - Cayley-Hamilton controllability matrix C = [ B AB A^2B ... A^nB ]
Ctrb = ct.ctrb(A, B)
print(f"Controllability matrix = \n{Ctrb}")

rank = np.linalg.matrix_rank(Ctrb)
print(f"Controllability rank: {rank}")

Controllability matrix = 
[[  0.     1.     0.     1.96]
 [  1.     0.     1.96   0.  ]
 [  0.    -2.     0.   -43.12]
 [ -2.     0.   -43.12   0.  ]]
Controllability rank: 4

# part 2 - controllability gramian
def ctrb_gram_integrand(t):
    eAt = expm(A * t)
    return eAt @ B @ B.T @ eAt.T


tf = np.arange(0.25, 2.25, 0.25)
W_c_eigs_sweep = []
W_c_dominant_state_impact = []
for t in tf:
    W_c, _ = quad_vec(ctrb_gram_integrand, 0, t)
    W_c = 0.5 * (W_c + W_c.T)  # ensure W_c is symmetric
    W_c_eigs, W_c_vecs = np.linalg.eigh(W_c)
    W_c_eigs_sweep.append(W_c_eigs)
    W_c_dominant_state_impact.append(W_c_eigs[-1] * W_c_vecs[:, -1] ** 2)

W_c_eigs_sweep = np.array(W_c_eigs_sweep)
W_c_dominant_state_impact = np.array(W_c_dominant_state_impact)


def plot_gramian_modes(tf, eigs, state_impact):
    fig, (ax_eigs, ax_mode) = plt.subplots(1, 2, figsize=(14, 5))

    for i in range(eigs.shape[1]):
        ax_eigs.semilogy(tf, eigs[:, i], label=rf"$\lambda_{i + 1}$", linewidth=2.5)
    ax_eigs.set_title(
        "Gramian Eigenvalues vs Time Horizon", fontsize=12, fontweight="bold", pad=10
    )
    ax_eigs.set_xlabel(r"Integration Horizon $t$ (sec)", fontsize=11)
    ax_eigs.set_ylabel("Eigenvalue Magnitude (log scale)", fontsize=11)
    ax_eigs.grid(True, which="both", ls="--", alpha=0.5)
    ax_eigs.legend(loc="upper left", fontsize=10)

    log_impact = np.log10(state_impact.T)
    im = ax_mode.imshow(log_impact, aspect="auto", origin="lower", cmap="viridis")
    ax_mode.set_title(
        r"Dominant Mode: $\lambda_{max} v_{max}^2$",
        fontsize=12,
        fontweight="bold",
        pad=10,
    )
    ax_mode.set_xlabel(r"Integration Horizon $t$ (sec)", fontsize=11)
    ax_mode.set_ylabel("State", fontsize=11)
    ax_mode.set_xticks(np.arange(len(tf)))
    ax_mode.set_xticklabels([f"{t:.2f}" for t in tf], rotation=45, ha="right")
    ax_mode.set_yticks(np.arange(len(STATE_LABELS)))
    ax_mode.set_yticklabels(STATE_LABELS)
    fig.colorbar(im, ax=ax_mode, label="log10 dominant mode contribution")

    plt.tight_layout()
    plt.show()


plot_gramian_modes(tf, W_c_eigs_sweep, W_c_dominant_state_impact)

def solver_metric(result, name):
    return getattr(result, name, np.nan)


def print_solver_diagnostics(result):
    print("Akreon solver")
    print(f"  status: {result.status}")
    print(f"  event: {result.event}")
    print(f"  stalled: {result.stalled}")
    print(f"  iterations: {result.num_iter}")
    gap = solver_metric(result, "normalized_duality_gap")
    obj = solver_metric(result, "primal_objective_value")
    dual_obj = solver_metric(result, "dual_objective_value")
    primal = solver_metric(result, "primal_residual_norm")
    dual = solver_metric(result, "dual_residual_norm")
    equality = solver_metric(result, "equality_residual_norm")

    print(f"  mu: {gap:.3e}")
    print(f"  primal objective: {obj:.3e}")
    print(f"  dual objective: {dual_obj:.3e}")

    print("  residual norms:")
    print(f"    primal: {primal:.3e}")
    print(f"    dual: {dual:.3e}")
    print(f"    nu: {equality:.3e}")


config = SolverConfig()
config.global_reg_eps = 1e-8

state_feedback_result = cartpole.solve_state_feedback(A, B, config)
K = np.asarray(state_feedback_result.K)

print_solver_diagnostics(state_feedback_result)
print(f"K = \n{K}")

Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 11
  mu: 3.562e-06
  primal objective: 4.056e-05
  dual objective: -4.067e-05
  residual norms:
    primal: 1.000e-10
    dual: 1.000e-10
    nu: 2.225e-308
K = 
[[ 3.96738748  6.73230129 57.74818222 10.55240291]]

A_cl = A + B @ K

poles = np.linalg.eigvals(A_cl)
is_stable = np.all(np.real(poles) < 0.0)

print(f"Closed loop poles: {poles}")
print(f"Closed loop stability: {is_stable}")
plot_poles(poles, title="Closed-loop SF poles")

Closed loop poles: [-6.30619348+5.18016169j -6.30619348-5.18016169j -0.88005878+0.62692769j
 -0.88005878-0.62692769j]
Closed loop stability: True

# part 1 - Cayley-Hamilton observability matrix
Obsv = ct.obsv(A, C)
print(f"Observability matrix = \n{Obsv}")

rank = np.linalg.matrix_rank(Obsv)
print(f"Observability rank: {rank}")

Observability matrix = 
[[ 1.    0.    0.    0.  ]
 [ 0.    0.    1.    0.  ]
 [ 0.    1.    0.    0.  ]
 [ 0.    0.    0.    1.  ]
 [ 0.    0.   -0.98  0.  ]
 [ 0.    0.   21.56  0.  ]
 [ 0.    0.    0.   -0.98]
 [ 0.    0.    0.   21.56]]
Observability rank: 4

# part 2 - Observability Gramian
def obsv_gram_integrand(t):
    eAt = expm(A * t)
    return eAt @ C.T @ C @ eAt.T


tf = np.arange(0.25, 2.25, 0.25)
W_o_eigs_sweep = []
W_o_dominant_state_impact = []
for t in tf:
    W_o, _ = quad_vec(obsv_gram_integrand, 0, t)
    W_o = 0.5 * (W_o + W_o.T)  # ensure W_o is symmetric
    W_o_eigs, W_o_vecs = np.linalg.eigh(W_o)
    W_o_eigs_sweep.append(W_o_eigs)
    W_o_dominant_state_impact.append(W_o_eigs[-1] * W_o_vecs[:, -1] ** 2)

W_o_eigs_sweep = np.array(W_o_eigs_sweep)
W_o_dominant_state_impact = np.array(W_o_dominant_state_impact)

plot_gramian_modes(tf, W_o_eigs_sweep, W_o_dominant_state_impact)

state_observer_result = cartpole.solve_state_observer(A, C, config)
L = np.asarray(state_observer_result.L)

print_solver_diagnostics(state_observer_result)
print(f"L = \n{L}")

Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 12
  mu: 7.300e-06
  primal objective: 7.444e-05
  dual objective: -7.460e-05
  residual norms:
    primal: 1.000e-10
    dual: 1.000e-10
    nu: 2.225e-308
L = 
[[ 1.14048044 -0.13943634]
 [ 1.27636064 -1.38239484]
 [ 0.04832205  2.77428729]
 [-0.293922   31.32871047]]

A_obs = A - L @ C

obs_poles = np.linalg.eigvals(A_obs)
is_obs_stable = np.all(np.real(obs_poles) < 0.0)

print(f"Closed loop poles: {obs_poles}")
print(f"Closed loop stability: {is_obs_stable}")
plot_poles(obs_poles, title="Observer Error Dynamics Poles")

Closed loop poles: [-0.56989489+0.96773542j -0.56989489-0.96773542j -1.38748897+2.80473174j
 -1.38748897-2.80473174j]
Closed loop stability: True

obs_ss = ct.ss(A + B @ K - L @ C, L, K, np.zeros((K.shape[0], C.shape[0])))
obs_ctrl = ct.c2d(obs_ss, CARTPOLE_DT)

print(f"Observer Controller \n{obs_ctrl}")

Observer Controller 
<StateSpace>: sys[61]$sampled
Inputs (2): ['u[0]', 'u[1]']
Outputs (1): ['y[0]']
States (4): ['x[0]', 'x[1]', 'x[2]', 'x[3]']
dt = 0.02

A = [[ 9.77893408e-01  2.09867359e-02  1.28181169e-02  1.99374108e-03]
     [ 4.09368084e-02  1.11660936e+00  9.57187756e-01  1.92307153e-01]
     [-2.25349305e-03 -2.41612641e-03  9.24210079e-01  1.55465987e-02]
     [-1.26378820e-01 -2.33537264e-01 -2.08763982e+00  6.13833265e-01]]

B = [[ 0.02281965 -0.00234998]
     [ 0.02751605  0.06119996]
     [ 0.0008406   0.05890087]
     [-0.01045097  0.44374051]]

C = [[ 3.96738748  6.73230129 57.74818222 10.55240291]]

D = [[0. 0.]]

Q = np.eye(4, dtype=np.float64)
Q[2][2] *= 5.0
Q[3][3] *= 10.0
R = np.array([[1.0]])
K_lqr, _, _ = ct.lqr(A, B, Q, R)

A_cl_lqr = A - B @ K_lqr

poles_lqr = np.linalg.eigvals(A_cl_lqr)
is_lqr_stable = np.all(np.real(poles_lqr) < 0.0)

print(f"Closed loop poles: {poles_lqr}")
print(f"Closed loop stability: {is_lqr_stable}")
plot_poles(poles_lqr, title="Closed-loop LQR Poles")

Closed loop poles: [-8.79812861+0.j        -2.43804425+0.j        -0.80566116+0.5144447j
 -0.80566116-0.5144447j]
Closed loop stability: True

R_lqe = np.eye(2, dtype=np.float64)

K_dual, _, _ = ct.lqr(A.T, C.T, Q, R_lqe)

L_lqe = K_dual.T
A_cl_lqe = A - L_lqe @ C

poles_lqe = np.linalg.eigvals(A_cl_lqe)
is_lqe_stable = np.all(np.real(poles_lqe) < 0.0)

print(f"LQE poles: {poles_lqe}")
print(f"LQE stability: {is_lqe_stable}")
plot_poles(poles_lqe, title="Closed-loop LQE Poles")

LQE poles: [-0.87034324+0.50302591j -0.87034324-0.50302591j -3.7207433 +0.j
 -5.85381454+0.j        ]
LQE stability: True

lqg_ss = ct.ss(
    A - B @ K_lqr - L_lqe @ C, L_lqe, K_lqr, np.zeros((K.shape[0], C.shape[0]))
)
lqg_ctrl = ct.c2d(lqg_ss, CARTPOLE_DT)

print(f"LQG Controller \n{lqg_ctrl}")

LQG Controller 
<StateSpace>: sys[63]$sampled
Inputs (2): ['u[0]', 'u[1]']
Outputs (1): ['y[0]']
States (4): ['x[0]', 'x[1]', 'x[2]', 'x[3]']
dt = 0.02

A = [[ 9.65637182e-01  2.00676146e-02  1.05624499e-02  1.45914907e-03]
     [-6.60117521e-04  1.04017005e+00  4.89334818e-01  1.38327892e-01]
     [ 5.13948426e-03 -7.32335816e-04  8.12274347e-01  1.55062747e-02]
     [-1.34377593e-02 -8.03784144e-02 -1.33982498e+00  7.19960854e-01]]

B = [[ 0.0345435  -0.00485096]
     [ 0.01822651  0.07500977]
     [-0.0054832   0.18054918]
     [-0.0216484   0.60409861]]

C = [[ -1.          -2.28725542 -31.51453309  -7.5673753 ]]

D = [[0. 0.]]

config = SolverConfig()
config.primal_feas_tol = 1e-3
config.dual_feas_tol = 1e-3
config.duality_gap_tol = 1e-3

B_tot = np.column_stack((B, B))  # [ B_w, B_u ]

# Scaling to preserve Q:R tradeoff and improves solver conditioning
cost_scale = 0.5
Q_h2 = cost_scale * Q
R_h2 = cost_scale * R

# Performance factors satisfy C_z^T C_z = Q_h2 and D_zu^T D_zu = R_h2
Q_sqrt = np.linalg.cholesky(Q_h2).T
R_sqrt = np.linalg.cholesky(R_h2).T
C_z = np.vstack((Q_sqrt, np.zeros((1, A.shape[0]))))
D_zw = np.zeros((C_z.shape[0], B.shape[1]), dtype=np.float64)
D_zu = np.vstack((np.zeros((Q.shape[0], B.shape[1])), R_sqrt))
D_z = np.hstack((D_zw, D_zu))

h2_feedback_result = cartpole.solve_h2_state_feedback(A, B_tot, C_z, D_z, config=config)

print_solver_diagnostics(h2_feedback_result)
K_h2 = np.asarray(h2_feedback_result.K)
print(f"K=\n{K_h2}")

Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 44
  mu: 6.599e-04
  primal objective: 6.436e+00
  dual objective: 6.410e+00
  residual norms:
    primal: 1.091e-04
    dual: 8.458e-05
    nu: 2.225e-308
K=
[[ 0.9487866   2.17299042 31.17546501  7.05884828]]

A_h2_cl = A + B @ K_h2
h2_poles = np.linalg.eigvals(A_h2_cl)

print(f"H2 CL poles: {h2_poles}")
print(f"H2 CL stable: {np.all(np.real(h2_poles) < 0.0)}")
plot_poles(h2_poles, title="Closed-loop H2 Poles")

H2 CL poles: [-7.16065034+0.j         -3.34725784+0.j         -0.71839898+0.50966873j
 -0.71839898-0.50966873j]
H2 CL stable: True

B_w_obs = np.column_stack((B, np.zeros((A.shape[0], C.shape[0]))))
C_obs_h2 = np.vstack((Q_sqrt, C))

sensor_noise_scale = 0.1
D_obs_h2 = np.zeros((C_obs_h2.shape[0], B_w_obs.shape[1]), dtype=np.float64)
D_obs_h2[Q_sqrt.shape[0] :, 1:] = sensor_noise_scale * np.eye(C.shape[0])

observer_result = cartpole.solve_h2_state_observer(
    A, B_w_obs, C_obs_h2, D_obs_h2, config=config
)
print_solver_diagnostics(observer_result)
L_h2 = np.asarray(observer_result.L)
print(f"L=\n{L_h2}")

observer_poles_h2 = np.linalg.eigvals(A - L_h2 @ C)
print(f"H2 observer poles: {observer_poles_h2}")
plot_poles(observer_poles_h2, title="H2 Observer Error Poles")

Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 27
  mu: 1.024e-04
  primal objective: 1.491e+01
  dual objective: 1.491e+01
  residual norms:
    primal: 2.958e-05
    dual: 1.000e-10
    nu: 2.225e-308
L=
[[ 3.94636531 -1.0465457 ]
 [ 8.43868843 -6.37350229]
 [-1.10736722  9.92480245]
 [-8.25328051 49.81940034]]
H2 observer poles: [-1.98171955+1.68443026j -1.98171955-1.68443026j -4.95386433+2.03275818j
 -4.95386433-2.03275818j]

h2_compensator_ss = ct.ss(
    A + B @ K_h2 - L_h2 @ C,
    L_h2,
    K_h2,
    np.zeros((K_h2.shape[0], C.shape[0])),
)
h2_compensator_poles = ct.poles(h2_compensator_ss)
h2_obs_ctrl = ct.c2d(h2_compensator_ss, CARTPOLE_DT)

A_h2_augmented = np.block(
    [
        [A, B @ K_h2],
        [L_h2 @ C, A + B @ K_h2 - L_h2 @ C],
    ]
)
h2_augmented_poles = np.linalg.eigvals(A_h2_augmented)

print(f"H2 compensator poles: {h2_compensator_poles}")
print(
    "H2 nominal plant + compensator stable: "
    f"{np.all(np.real(h2_augmented_poles) < 0.0)}"
)
print(f"H2 nominal plant + compensator poles: {h2_augmented_poles}")
print(f"H2 observer controller \n{h2_obs_ctrl}")

display(plot_poles(h2_compensator_poles, title="H2 Dynamic Compensator Poles"))
display(plot_poles(h2_augmented_poles, title="H2 Nominal Plant + Compensator Poles"))

H2 compensator poles: [-11.18546381+9.74633041j -11.18546381-9.74633041j
  -1.72247314+1.28690556j  -1.72247314-1.28690556j]
H2 nominal plant + compensator stable: True
H2 nominal plant + compensator poles: [-4.95386433+2.03275818j -4.95386433-2.03275818j -0.71839898+0.50966873j
 -0.71839898-0.50966873j -1.98171955+1.68443026j -1.98171955-1.68443026j
 -3.34725784+0.j         -7.16065034+0.j        ]
H2 observer controller 
<StateSpace>: sys[65]$sampled
Inputs (2): ['u[0]', 'u[1]']
Outputs (1): ['y[0]']
States (4): ['x[0]', 'x[1]', 'x[2]', 'x[3]']
dt = 0.02

A = [[ 9.22980462e-01  1.96084451e-02  2.41147895e-02  1.48340213e-03]
     [-1.31936337e-01  1.03708796e+00  5.61283988e-01  1.31101382e-01]
     [ 2.02599907e-02 -5.38234035e-04  8.05638511e-01  1.56116183e-02]
     [ 9.44366483e-02 -7.58129964e-02 -1.43975747e+00  7.34738666e-01]]

B = [[ 7.71876781e-02 -1.85654035e-02]
     [ 1.48730698e-01 -4.28705678e-04]
     [-2.05862586e-02  1.87311042e-01]
     [-1.27995843e-01  7.10573322e-01]]

C = [[ 0.9487866   2.17299042 31.17546501  7.05884828]]

D = [[0. 0.]]

mp_uncertainty = 0.15
mp_vertices = mp * np.array([1.0 - mp_uncertainty, 1.0 + mp_uncertainty])


def cartpole_A_mp(mp_hat):
    return np.array(
        [
            [0.0, 1.0, 0.0, 0.0],
            [0.0, 0.0, -(mp_hat / mc) * gravity, 0.0],
            [0.0, 0.0, 0.0, 1.0],
            [
                0.0,
                0.0,
                ((mp_hat + mc) / (mc * pole_length)) * gravity,
                0.0,
            ],
        ],
        dtype=np.float64,
    )


A_mass_vertices = np.stack([cartpole_A_mp(mass) for mass in mp_vertices])
vertex_count = A_mass_vertices.shape[0]


def repeat_at_vertices(matrix):
    return np.repeat(matrix[np.newaxis, :, :], vertex_count, axis=0)


poly_config = SolverConfig()
poly_config.primal_feas_tol = 1e-3
poly_config.dual_feas_tol = 5e-3
poly_config.duality_gap_tol = 1e-3

poly_feedback_result = cartpole.solve_h2_polytopic_state_feedback(
    A_mass_vertices,
    repeat_at_vertices(B_tot),
    repeat_at_vertices(C_z),
    repeat_at_vertices(D_z),
    config=poly_config,
)

print("Polytopic H2 feedback")
print_solver_diagnostics(poly_feedback_result)
K_h2_poly = np.asarray(poly_feedback_result.K)
print(f"K_h2_poly=\n{K_h2_poly}")

poly_observer_result = cartpole.solve_h2_polytopic_state_observer(
    A_mass_vertices,
    repeat_at_vertices(B_w_obs),
    repeat_at_vertices(C_obs_h2),
    repeat_at_vertices(D_obs_h2),
    config=poly_config,
)
print("\nPolytopic H2 Observer")
print_solver_diagnostics(poly_observer_result)
L_h2_poly = np.asarray(poly_observer_result.L)
print(f"L_h2_poly=\n{L_h2_poly}")

poly_feedback_poles = [
    np.linalg.eigvals(A_vertex + B @ K_h2_poly) for A_vertex in A_mass_vertices
]

poly_observer_poles = [
    np.linalg.eigvals(A_vertex - L_h2_poly @ C) for A_vertex in A_mass_vertices
]

for index, mass in enumerate(mp_vertices):
    print(f"\nm_p vertex {index}: {mass:.4f} kg")
    print(f"  feedback poles: {poly_feedback_poles[index]}")
    print(f"  obeserver poles: {poly_observer_poles[index]}")

display(
    plot_poles(
        np.concatenate(poly_feedback_poles),
        title="Polytopic H2 Feedback Poles at Pendulum-Mass Vertices",
    )
)

display(
    plot_poles(
        np.concatenate(poly_observer_poles),
        title="Polytopic H2 Observer Poles at Pendulum-Mass Vertices",
    )
)

Polytopic H2 feedback
Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 35
  mu: 1.150e-04
  primal objective: 6.438e+00
  dual objective: 6.447e+00
  residual norms:
    primal: 3.231e-04
    dual: 2.457e-04
    nu: 2.225e-308
K_h2_poly=
[[ 1.01611422  2.33311615 31.59087682  7.53988726]]

Polytopic H2 Observer
Akreon solver
  status: OPTIMAL
  event: NONE
  stalled: False
  iterations: 15
  mu: 4.191e-04
  primal objective: 1.513e+01
  dual objective: 1.510e+01
  residual norms:
    primal: 8.418e-05
    dual: 4.347e-03
    nu: 2.225e-308
L_h2_poly=
[[ 3.92321683 -0.97929097]
 [ 8.47447665 -6.49072034]
 [-1.0994947   9.88202435]
 [-8.2533579  50.16550486]]

m_p vertex 0: 0.0850 kg
  feedback poles: [-8.5617297 +0.j        -2.55926868+0.j        -0.81282999+0.4982149j
 -0.81282999-0.4982149j]
  obeserver poles: [-1.98828542+1.68423177j -1.98828542-1.68423177j -4.91433517+2.24534427j
 -4.91433517-2.24534427j]

m_p vertex 1: 0.1150 kg
  feedback poles: [-8.67824424+0.j         -2.35937098+0.j         -0.85452158+0.49241652j
 -0.85452158-0.49241652j]
  obeserver poles: [-1.98656854+1.70385962j -1.98656854-1.70385962j -4.91605205+2.09693187j
 -4.91605205-2.09693187j]

A_h2_poly_compensator = A + B @ K_h2_poly - L_h2_poly @ C
h2_poly_compensator = ct.ss(
    A_h2_poly_compensator,
    L_h2_poly,
    K_h2_poly,
    np.zeros((K_h2_poly.shape[0], C.shape[0])),
)
h2_poly_compensator_poles = ct.poles(h2_poly_compensator)
h2_poly_compensator_discrete = ct.c2d(h2_poly_compensator, CARTPOLE_DT)

poly_augmented_poles = []
for index, A_vertex in enumerate(A_mass_vertices):
    A_augmented = np.block(
        [
            [A_vertex, B @ K_h2_poly],
            [L_h2_poly @ C, A_h2_poly_compensator],
        ]
    )
    vertex_augmented_poles = np.linalg.eigvals(A_augmented)
    poly_augmented_poles.append(vertex_augmented_poles)
    print(
        f"m_p vertex {index} plant + compensator stable: "
        f"{np.all(np.real(vertex_augmented_poles) < 0.0)}"
    )

print(f"\nPolytopic H2 compensator poles: {h2_poly_compensator_poles}")
print(f"Polytopic H2 compensator:\n{h2_poly_compensator_discrete}")
display(
    plot_poles(
        h2_poly_compensator_poles,
        title="Polytopic H2 Dynamic Compensator Poles",
    )
)
display(
    plot_poles(
        np.concatenate(poly_augmented_poles),
        title="Polytopic H2 Plant + Compensator Poles at Pendulum-Mass Vertices",
    )
)

m_p vertex 0 plant + compensator stable: True
m_p vertex 1 plant + compensator stable: True

Polytopic H2 compensator poles: [-11.56422867+9.76434024j -11.56422867-9.76434024j
  -1.7117211 +1.31387282j  -1.7117211 -1.31387282j]
Polytopic H2 compensator:
<StateSpace>: sys[67]$sampled
Inputs (2): ['u[0]', 'u[1]']
Outputs (1): ['y[0]']
States (4): ['x[0]', 'x[1]', 'x[2]', 'x[3]']
dt = 0.02

A = [[ 9.23399919e-01  1.96394364e-02  2.30097531e-02  1.55134012e-03]
     [-1.31220637e-01  1.03961049e+00  5.63913715e-01  1.38636103e-01]
     [ 2.01092339e-02 -5.90865966e-04  8.06209825e-01  1.54654459e-02]
     [ 9.15965996e-02 -8.08670965e-02 -1.44705927e+00  7.19663548e-01]]

B = [[ 0.0767794  -0.01739998]
     [ 0.14906788  0.00171882]
     [-0.02045693  0.18662855]
     [-0.12725942  0.70833406]]

C = [[ 1.01611422  2.33311615 31.59087682  7.53988726]]

D = [[0. 0.]]

shadow_price_results = [
    ("state feedback", state_feedback_result),
    ("state observer", state_observer_result),
    ("H2 feedback", h2_feedback_result),
    ("H2 observer", observer_result),
    ("polytopic H2 feedback", poly_feedback_result),
    ("polytopic H2 observer", poly_observer_result),
]

plot_shadow_price_summary(
    shadow_price_results, title="CartPole LMI Shadow Price Diagnostics"
)

def dynamic_compensator_specs():
    return [
        {
            "label": "LQG",
            "A": A - B @ K_lqr - L_lqe @ C,
            "B": L_lqe,
            "C": -K_lqr,
            "D": np.zeros((K_lqr.shape[0], C.shape[0])),
        },
        {
            "label": "H2",
            "A": A + B @ K_h2 - L_h2 @ C,
            "B": L_h2,
            "C": K_h2,
            "D": np.zeros((K_h2.shape[0], C.shape[0])),
        },
        {
            "label": "Polytopic H2",
            "A": A_h2_poly_compensator,
            "B": L_h2_poly,
            "C": K_h2_poly,
            "D": np.zeros((K_h2_poly.shape[0], C.shape[0])),
        },
    ]


def plant_plus_compensator_system(A_plant, B_plant, C_measure, controller):
    ctrl_state_count = controller["A"].shape[0]
    A_augmented = np.block(
        [
            [A_plant, B_plant @ controller["C"]],
            [controller["B"] @ C_measure, controller["A"]],
        ]
    )
    B_disturbance = np.vstack((B_plant, np.zeros((ctrl_state_count, B_plant.shape[1]))))
    C_state = np.hstack(
        (np.eye(A_plant.shape[0]), np.zeros((A_plant.shape[0], ctrl_state_count)))
    )
    D_state = np.zeros((A_plant.shape[0], B_plant.shape[1]))
    return ct.ss(A_augmented, B_disturbance, C_state, D_state)


def force_pulse(time, *, start=0.35, stop=0.75, amplitude=0.75):
    return amplitude * ((time >= start) & (time <= stop)).astype(float)


def simulate_system(system, time, disturbance):
    _, y_out = ct.forced_response(system, T=time, U=disturbance)
    return np.asarray(y_out).T


def discretize_controller(controller):
    controller_ss = ct.ss(
        controller["A"], controller["B"], controller["C"], controller["D"]
    )
    controller_d = ct.c2d(controller_ss, CARTPOLE_DT)
    return (
        np.asarray(controller_d.A),
        np.asarray(controller_d.B),
        np.asarray(controller_d.C),
        np.asarray(controller_d.D),
    )


def configure_pendulum_mass(env, mass):
    model = env.unwrapped
    model.masspole = float(mass)
    model.total_mass = model.masspole + model.masscart
    model.polemass_length = model.masspole * model.length


def run_gym_rollout(
    controller,
    *,
    init_state,
    pendulum_mass=mp,
    disturbance=None,
    steps=250,
    seed=42,
    max_force=10.0,
    capture_frames=False,
):
    render_mode = "rgb_array" if capture_frames else None
    env = gym.make("CartPole-v1", render_mode=render_mode)
    configure_pendulum_mass(env, pendulum_mass)
    env.reset(seed=seed)
    env.unwrapped.state = np.asarray(init_state, dtype=np.float64).copy()

    A_ctrl, B_ctrl, C_ctrl, D_ctrl = discretize_controller(controller)

    ctrl_state = np.zeros(A_ctrl.shape[0], dtype=np.float64)
    states, controls, frames = [], [], []

    for step in range(steps):
        state = np.asarray(env.unwrapped.state, dtype=np.float64)
        measurement = C @ state
        force_cmd = float((C_ctrl @ ctrl_state + D_ctrl @ measurement).squeeze())
        disturbance_force = (
            0.0 if disturbance is None else float(disturbance(step * CARTPOLE_DT))
        )
        applied_force = float(
            np.clip(force_cmd + disturbance_force, -max_force, max_force)
        )
        env.unwrapped.force_mag = abs(applied_force)
        action = 1 if applied_force >= 0.0 else 0

        if capture_frames:
            frames.append(env.rend())

        states.append(state.copy())
        controls.append(applied_force)

        _, _, terminated, truncated, _ = env.step(action)
        ctrl_state = A_ctrl @ ctrl_state + B_ctrl @ measurement

        if terminated or truncated:
            break

    env.close()
    return {
        "states": np.asarray(states),
        "controls": np.asarray(controls),
        "frames": frames,
    }

dynamic_compensators = dynamic_compensator_specs()
dynamic_labels = [ctrl["label"] for ctrl in dynamic_compensators]
dynamic_closed_loop_systems = [
    plant_plus_compensator_system(A, B, C, ctrl) for ctrl in dynamic_compensators
]

plot_state_step_responses(
    dynamic_closed_loop_systems,
    dynamic_labels,
    STATE_LABELS,
    T=np.linspace(0.0, 10.0, 600),
)

trajectory_time = np.arange(0.0, 10.0, CARTPOLE_DT)
disturbance_profile = force_pulse(trajectory_time)
disturbed_linear_trajectories = [
    simulate_system(system, trajectory_time, disturbance_profile)
    for system in dynamic_closed_loop_systems
]
plot_state_trajectories(
    disturbed_linear_trajectories,
    dynamic_labels,
    STATE_LABELS,
    "Linear Dynamic Compensator State Trajectories with Force Pulse",
)

init_rollout_state = np.array([0.0, 0.0, 0.08, 0.0], dtype=np.float64)


def rollout_disturbance(time):
    return force_pulse(np.asarray([time]), amplitude=0.75)[0]


disturbed_rollouts = [
    run_gym_rollout(
        ctrl, init_state=init_rollout_state, disturbance=rollout_disturbance
    )
    for ctrl in dynamic_compensators
]
plot_rollout_states(
    disturbed_rollouts,
    dynamic_labels,
    STATE_LABELS,
    "Gymnasium Dynamic Compensator Rollouts with Force Pulse",
)

mass_variation_rollouts, mass_variation_labels = [], []
for mass in mp_vertices:
    mass_rollouts, mass_labels = [], []
    for ctrl in dynamic_compensators:
        mass_rollouts.append(
            run_gym_rollout(
                ctrl,
                init_state=init_rollout_state,
                pendulum_mass=mass,
                disturbance=rollout_disturbance,
            )
        )
        mass_labels.append(ctrl["label"])
    mass_variation_rollouts.append(mass_rollouts)
    mass_variation_labels.append(mass_labels)
    display(
        plot_rollout_states(
            mass_rollouts,
            mass_labels,
            STATE_LABELS,
            f"Gymnasium Rollouts with Force Pulse | m_p={mass:.3f} kg",
        )
    )

From Dynamics to Feedback with Uncertainty¶

Part 1 - Equations of Motion¶

Part 2 - Linearization and State-Space Analysis¶

Cayley-Hamilton Controllability Matrix¶

Controllability Gramian¶

Part 3 - State Feedback via LMIs¶

LMI State Feedback¶

Closed Loop Analysis¶

Part 4 - Observer Feedback¶

Observability Matrix¶

Observability Gramian¶

LMI Luenberger Observer¶

Observer Error Dynamics Analysis¶

Observer State Feedback¶

Part 5 - Optimal Control Design and Analysis¶

LQR via CARE¶

Linear Quadratic Estimator (LQE)¶

Linear Quadratic Gaussian (LQG)¶

H2 Control via LMI and SDP¶

Disturbance Dynamics¶

H2 Observer Synthesis¶

H2 Dynamic Compensator¶

Plant + K(s)¶

Part 6 - Uncertainty Modeling with Polytopic LMIs¶

Shadow Price Summary¶

Simulation & Results¶

Forced Step Response Plot¶

Linear State Trajectory Plots¶

Gymnasium Rollout with Disturbance¶

Pendulum Mass Variation in Gymnasium Rollout¶