load("numerics")$
load("numerics-optimize")$
load("ax-plots")$

/* Per-point model and loss */
model_i(A, k, c, t_i) := A * exp(-k * t_i) + c$
L_i(A, k, c, t_i, y_i) := (1/2) * (y_i - model_i(A, k, c, t_i))^2$

/* Symbolic partial derivatives */
dL_dA : diff(L_i(A, k, c, t[i], y[i]), A);
dL_dk : diff(L_i(A, k, c, t[i], y[i]), k);
dL_dc : diff(L_i(A, k, c, t[i], y[i]), c);

-(%e^-(t[i]*k)*(y[i]-c-%e^-(t[i]*k)*A))
%e^-(t[i]*k)*A*t[i]*(y[i]-c-%e^-(t[i]*k)*A)

/* True parameters: A=5, k=0.3, c=1 */
n : 50$
t_data : np_linspace(0, 10, n)$
noise : np_scale(0.3, np_randn([n]))$
y_data : np_add(
  np_add(np_scale(5.0, np_exp(np_scale(-0.3, t_data))), 1.0),
  noise)$
print("Generated", n, "noisy observations")$
print("True: A=5, k=0.3, c=1")$

Generated 50 noisy observations
True: A=5, k=0.3, c=1

/* Cost and gradient — implementing the symbolic derivatives above */
exp_cost(params) := block([A_val, k_val, c_val, pred, res],
  A_val : np_ref(params, 0),
  k_val : np_ref(params, 1),
  c_val : np_ref(params, 2),
  pred : np_add(np_scale(A_val, np_exp(np_scale(-k_val, t_data))), c_val),
  res : np_sub(pred, y_data),
  np_sum(np_pow(res, 2)) / (2 * n))$

exp_grad(params) := block([A_val, k_val, c_val, e_kt, pred, res],
  A_val : np_ref(params, 0),
  k_val : np_ref(params, 1),
  c_val : np_ref(params, 2),
  e_kt : np_exp(np_scale(-k_val, t_data)),
  pred : np_add(np_scale(A_val, e_kt), c_val),
  res : np_sub(pred, y_data),
  /* Gradient from symbolic derivation */
  ndarray([
    np_sum(np_mul(res, e_kt)) / n,
    -np_sum(np_mul(np_mul(res, np_scale(A_val, t_data)), e_kt)) / n,
    np_sum(res) / n
  ]))$

/* Initial guess (deliberately off) */
p0 : ndarray([1.0, 1.0, 0.0])$
[p_opt, loss_opt, converged] : np_minimize(exp_cost, exp_grad, p0)$

A_fit : np_ref(p_opt, 0)$
k_fit : np_ref(p_opt, 1)$
c_fit : np_ref(p_opt, 2)$
print("Fitted: A =", A_fit, ", k =", k_fit, ", c =", c_fit)$
print("True:   A = 5, k = 0.3, c = 1")$
print("Loss:", loss_opt, " Converged:", converged)$

*************************************************
  N=    3   NUMBER OF CORRECTIONS= 3
       INITIAL VALUES
 F=  3.723107552983336D+00   GNORM=  2.531628224447235D+00
*************************************************
   I  NFN     FUNC                    GNORM                   STEPLENGTH
   1    2     1.687071541559957D+00   1.549320579649373D+00   3.950027062991620D-01
  26   32     6.338349441517671D-02   4.299280944043438D-11   1.000000000000000D+00
 THE MINIMIZATION TERMINATED WITHOUT DETECTING ERRORS.
 IFLAG = 0
Fitted: A = 4.953573796500634 , k = 0.31166915597636485 , c =
           1.0286091924468495
True:   A = 5, k = 0.3, c = 1
Loss: 0.06338349441517671  Converged: true

ax_draw2d(
  color="red", marker_size=5, name="Noisy data",
  points(np_to_list(t_data), np_to_list(y_data)),
  color="blue", line_width=2, name="Fitted: A*exp(-k*t)+c",
  explicit(A_fit * exp(-k_fit * t) + c_fit, t, 0, 10),
  color="gray", dash="dash", name="True curve",
  explicit(5 * exp(-0.3*t) + 1, t, 0, 10),
  title="Exponential Decay Fit", xlabel="t", ylabel="y",
  grid=true, showlegend=true
)$

/* Residual plot */
pred_fit : np_add(np_scale(A_fit, np_exp(np_scale(-k_fit, t_data))), c_fit)$
residuals : np_sub(y_data, pred_fit)$
ax_draw2d(
  color="blue", marker_size=4,
  points(np_to_list(t_data), np_to_list(residuals)),
  color="black", dash="dash", explicit(0, t, 0, 10),
  title="Residuals", xlabel="t", ylabel="y - y_hat",
  grid=true
)$

/* Symbolic Gaussian model and loss */
gauss_i(A, mu, sigma, x_i) := A * exp(-(x_i - mu)^2 / (2 * sigma^2))$
L_gauss(A, mu, sigma, x_i, y_i) := (1/2) * (y_i - gauss_i(A, mu, sigma, x_i))^2$

dG_dA : ratsimp(diff(L_gauss(A, mu, sigma, x[i], y[i]), A));
dG_dmu : ratsimp(diff(L_gauss(A, mu, sigma, x[i], y[i]), mu));
dG_dsigma : ratsimp(diff(L_gauss(A, mu, sigma, x[i], y[i]), sigma));

-(%e^(-(mu^2/sigma^2)-x[i]^2/sigma^2)*(%e
                                      ^(mu^2/(2*sigma^2)+(x[i]*mu)/sigma^2
                                                        +x[i]^2/(2*sigma^2))
                                      *y[i]
                                      -%e^((2*x[i]*mu)/sigma^2)*A))
(%e^(-(mu^2/sigma^2)-x[i]^2/sigma^2)*((%e
                                     ^(mu^2/(2*sigma^2)+(x[i]*mu)/sigma^2
                                                       +x[i]^2/(2*sigma^2))
                                     *A*y[i]
                                     -%e^((2*x[i]*mu)/sigma^2)*A^2)
                                     *mu
                                     -%e
                                      ^(mu^2/(2*sigma^2)+(x[i]*mu)/sigma^2
                                                        +x[i]^2/(2*sigma^2))
                                      *A*x[i]*y[i]
                                     +%e^((2*x[i]*mu)/sigma^2)*A^2*x[i]))
 /sigma^2

/* True: A=3, mu=5, sigma=1.2 */
n_g : 60$
x_g : np_linspace(0, 10, n_g)$
y_g : np_add(
  np_scale(3.0, np_exp(np_neg(np_div(np_pow(np_sub(x_g, 5.0), 2), 2*1.2^2)))),
  np_scale(0.15, np_randn([n_g])))$

gauss_cost(p) := block([A_v, mu_v, sig_v, d, pred, res],
  A_v : np_ref(p, 0), mu_v : np_ref(p, 1), sig_v : np_ref(p, 2),
  d : np_sub(x_g, mu_v),
  pred : np_scale(A_v, np_exp(np_neg(np_div(np_pow(d, 2), 2*sig_v^2)))),
  res : np_sub(pred, y_g),
  np_sum(np_pow(res, 2)) / (2 * n_g))$

gauss_grad(p) := block([A_v, mu_v, sig_v, d, e_term, pred, res],
  A_v : np_ref(p, 0), mu_v : np_ref(p, 1), sig_v : np_ref(p, 2),
  d : np_sub(x_g, mu_v),
  e_term : np_exp(np_neg(np_div(np_pow(d, 2), 2*sig_v^2))),
  pred : np_scale(A_v, e_term),
  res : np_sub(pred, y_g),
  ndarray([
    np_sum(np_mul(res, e_term)) / n_g,
    np_sum(np_mul(np_mul(res, pred), d)) / (n_g * sig_v^2),
    np_sum(np_mul(np_mul(res, pred), np_pow(d, 2))) / (n_g * sig_v^3)
  ]))$

[p_g, loss_g, conv_g] : np_minimize(gauss_cost, gauss_grad, ndarray([2.0, 4.0, 2.0]))$
print("Fitted: A =", np_ref(p_g, 0), ", mu =", np_ref(p_g, 1),
      ", sigma =", np_ref(p_g, 2))$
print("True:   A = 3, mu = 5, sigma = 1.2")$

*************************************************
  N=    3   NUMBER OF CORRECTIONS= 3
       INITIAL VALUES
 F=  2.795624596558920D-01   GNORM=  2.700083695494366D-01
*************************************************
   I  NFN     FUNC                    GNORM                   STEPLENGTH
   1    2     1.105509895993855D-01   1.573322017045931D-01   3.703588898628222D+00
  13   15     9.422064231565684D-03   1.818274464655374D-08   1.000000000000000D+00
 THE MINIMIZATION TERMINATED WITHOUT DETECTING ERRORS.
 IFLAG = 0
Fitted: A = 3.064383725198653 , mu = 5.013261450896073 , sigma =
           1.164933535523506
True:   A = 3, mu = 5, sigma = 1.2

A_gf : np_ref(p_g, 0)$  mu_gf : np_ref(p_g, 1)$  sig_gf : np_ref(p_g, 2)$
ax_draw2d(
  color="red", marker_size=5, name="Data",
  points(np_to_list(x_g), np_to_list(y_g)),
  color="blue", line_width=2, name="Fitted Gaussian",
  explicit(A_gf * exp(-(t - mu_gf)^2 / (2*sig_gf^2)), t, 0, 10),
  color="gray", dash="dash", name="True",
  explicit(3 * exp(-(t - 5)^2 / (2*1.2^2)), t, 0, 10),
  title="Gaussian Peak Fit", xlabel="x", ylabel="y",
  grid=true, showlegend=true
)$

/* Generate data without offset: y = 5*exp(-0.3*t) + noise */
y_nooff : np_add(np_scale(5.0, np_exp(np_scale(-0.3, t_data))),
               np_scale(0.2, np_randn([n])))$

/* Log-transform (clip to avoid log of negatives) */
y_log : np_log(np_clip(y_nooff, 0.01, 100.0))$

/* Design matrix [1, t] for linear fit of ln(y) = ln(A) - k*t */
X_lin : np_hstack(np_ones([n, 1]), np_reshape(t_data, [n, 1]))$
[coeffs, _, _, _] : np_lstsq(X_lin, np_reshape(y_log, [n, 1]))$
A_lin : exp(np_ref(coeffs, 0, 0))$
k_lin : -np_ref(coeffs, 1, 0)$
print("Linearized: A =", A_lin, ", k =", k_lin)$

/* Nonlinear fit for comparison */
exp2_cost(p) := block([A_v, k_v, pred, res],
  A_v : np_ref(p, 0), k_v : np_ref(p, 1),
  pred : np_scale(A_v, np_exp(np_scale(-k_v, t_data))),
  res : np_sub(pred, y_nooff),
  np_sum(np_pow(res, 2)) / (2*n))$

exp2_grad(p) := block([A_v, k_v, e_kt, pred, res],
  A_v : np_ref(p, 0), k_v : np_ref(p, 1),
  e_kt : np_exp(np_scale(-k_v, t_data)),
  pred : np_scale(A_v, e_kt),
  res : np_sub(pred, y_nooff),
  ndarray([np_sum(np_mul(res, e_kt))/n,
           -np_sum(np_mul(np_mul(res, np_scale(A_v, t_data)), e_kt))/n]))$

[p2, _, _] : np_minimize(exp2_cost, exp2_grad, ndarray([1.0, 1.0]))$
print("Nonlinear:  A =", np_ref(p2, 0), ", k =", np_ref(p2, 1))$
print("True:       A = 5, k = 0.3")$

Linearized: A = 5.9462364459363695 , k = 0.36605364812105573
*************************************************
  N=    2   NUMBER OF CORRECTIONS= 2
       INITIAL VALUES
 F=  1.741349948391961D+00   GNORM=  4.483661850900379D-01
*************************************************
   I  NFN     FUNC                    GNORM                   STEPLENGTH
   1    7     4.465061362064093D-01   9.227740354532664D-01   3.424831070817150D+00
  13   22     1.892941449113934D-02   2.139353700853245D-09   1.000000000000000D+00
 THE MINIMIZATION TERMINATED WITHOUT DETECTING ERRORS.
 IFLAG = 0
Nonlinear:  A = 4.966060946751338 , k = 0.30271955688451435
True:       A = 5, k = 0.3

ax_draw2d(
  color="gray", marker_size=4, name="Data",
  points(np_to_list(t_data), np_to_list(y_nooff)),
  color="orange", line_width=2, name="Linearized fit",
  explicit(A_lin * exp(-k_lin * t), t, 0, 10),
  color="blue", line_width=2, name="Nonlinear (L-BFGS) fit",
  explicit(np_ref(p2, 0) * exp(-np_ref(p2, 1) * t), t, 0, 10),
  color="black", dash="dash", name="True",
  explicit(5 * exp(-0.3*t), t, 0, 10),
  title="Linearized vs Nonlinear Fit",
  xlabel="t", ylabel="y", grid=true, showlegend=true
)$

/* Jacobian columns from symbolic derivatives:
   J[i,0] = d(model)/dA = exp(-k*t_i)
   J[i,1] = d(model)/dk = -A*t_i*exp(-k*t_i)
   J[i,2] = d(model)/dc = 1                  */
e_fit : np_exp(np_scale(-k_fit, t_data))$
J_col1 : np_reshape(e_fit, [n, 1])$
J_col2 : np_reshape(np_scale(-A_fit, np_mul(t_data, e_fit)), [n, 1])$
J_col3 : np_ones([n, 1])$
J : np_hstack(J_col1, J_col2, J_col3)$

/* Approximate covariance: sigma^2 * (J^T J)^-1 */
sigma2_hat : 2 * loss_opt * n / (n - 3)$  /* residual variance, dof = n-p */
JtJ : np_matmul(np_transpose(J), J)$
cov_params : np_scale(sigma2_hat, np_inv(JtJ))$

print("Approximate standard errors:")$
print("  se(A) =", sqrt(np_ref(cov_params, 0, 0)))$
print("  se(k) =", sqrt(np_ref(cov_params, 1, 1)))$
print("  se(c) =", sqrt(np_ref(cov_params, 2, 2)))$

Approximate standard errors:
  se(A) = 0.19725471926727575
  se(k) = 0.03695938326522652
  se(c) = 0.1782944850415087

Model	Parameters	Method	S+N Bridge
Exponential decay	A, k, c	`np_minimize`	`diff()` $\to$ gradient
Gaussian peak	A, $\mu$, $\sigma$	`np_minimize`	`diff()` $\to$ gradient
Exponential (no offset)	A, k	`np_lstsq` + `np_minimize`	Compare approaches

Nonlinear Curve Fitting with L-BFGS¶

1. Exponential Decay: $y = A e^{-kt} + c$ [S+N]¶

Symbolic Phase: Deriving the Gradient¶

Numeric Phase: Generate Data and Optimize¶

2. Gaussian Peak Fitting [S+N]¶

3. Linearized Least Squares — A Comparison¶

4. Confidence Intervals via the Jacobian¶

Summary¶