load("numerics")$
load("ax-plots")$

n : 200$

/* Independent standard-normal sources */
z1 : np_randn([n])$
z2 : np_randn([n])$
z3 : np_randn([n])$

/* Build variables with known correlation structure */
x1 : z1$
x2 : np_add(np_scale(0.8, z1), np_scale(0.6, z2))$
x3 : z3$
x4 : np_add(np_scale(-0.7, z1), np_scale(0.714, z3))$

print("Generated", n, "observations of 4 variables")$

Generated 200 observations of 4 variables

/* Stack the four 1D vectors into an n-by-4 matrix (each column is a variable) */
data : np_hstack(
  np_hstack(
    np_reshape(x1, [n, 1]),
    np_reshape(x2, [n, 1])
  ),
  np_hstack(
    np_reshape(x3, [n, 1]),
    np_reshape(x4, [n, 1])
  )
)$
print("Data shape:", np_shape(data))$

/* Compute the correlation matrix */
R : np_corrcoef(data)$
print("Correlation matrix:")$
np_to_matrix(R);

Data shape: [200,4]
Correlation matrix:

ax_draw2d(
  ax_heatmap(
    np_to_matrix(R),
    ["x1", "x2", "x3", "x4"],
    ["x1", "x2", "x3", "x4"]
  ),
  colorscale="RdBu",
  title="Pearson Correlation Matrix"
)$

/* Most correlated pair: x1 vs x2 */
ax_draw2d(
  marker_size=4, opacity=0.6,
  points(x1, x2),
  title="x1 vs x2 (positive correlation)",
  xlabel="x1", ylabel="x2",
  aspect_ratio=true
)$

/* Anti-correlated pair: x1 vs x4 */
ax_draw2d(
  color="red", marker_size=4, opacity=0.6,
  points(x1, x4),
  title="x1 vs x4 (negative correlation)",
  xlabel="x1", ylabel="x4",
  aspect_ratio=true
)$

/* Uncorrelated pair: x1 vs x3 */
ax_draw2d(
  color="green", marker_size=4, opacity=0.6,
  points(x1, x3),
  title="x1 vs x3 (uncorrelated)",
  xlabel="x1", ylabel="x3",
  aspect_ratio=true
)$

/* Covariance matrix -- entries depend on variable scales */
print("Covariance matrix:")$
np_to_matrix(np_cov(data));

print("Correlation matrix (normalized to [-1, 1]):")$
np_to_matrix(np_corrcoef(data));

Covariance matrix:
matrix([1.0980767688991973,0.9228626572630175,-0.023322556823519025,
        -0.7853060438014303],
       [0.9228626572630175,1.1342293018123484,-0.057796496231947714,
        -0.6872705583937225],
       [-0.023322556823519025,-0.057796496231947714,1.045244577214172,
        0.762630417907382],
       [-0.7853060438014303,-0.6872705583937225,0.762630417907382,
        1.094232349046872])
Correlation matrix (normalized to [-1, 1]):

Variable	Construction	Expected correlations
$x_1$	$z_1$	baseline
$x_2$	$0.8\,z_1 + 0.6\,z_2$	positively correlated with $x_1$
$x_3$	$z_3$	independent of $x_1$ and $x_2$
$x_4$	$-0.7\,z_1 + 0.714\,z_3$	anti-correlated with $x_1$

Correlation Heatmap¶

Generating Correlated Data¶

Visualizing the Correlation Matrix¶

Interpreting Correlations¶

Covariance vs Correlation¶

Summary¶