A Coding Guide Implementing SHAP Explainability Workflows with Explainer Comparisons, Maskers, Interactions, Drift, and Black-Box Models
print("\n" + "="*72)
print("PART 3: Interaction decomposition")
print("="*72)
inter = tree_expl.shap_interaction_values(X_te.iloc[:500])
inter_abs = np.abs(inter).mean(0)
diag = np.diagonal(inter_abs).copy()
off = inter_abs.copy(); np.fill_diagonal(off, 0)
main_share = diag.sum() / (diag.sum() + off.sum())
print(f"Total attribution mass: {main_share*100:.1f}% main effects, "
f"{(1-main_share)*100:.1f}% interactions")
pairs = [(X.columns[i], X.columns[j], off[i, j])
for i in range(X.shape[1]) for j in range(i+1, X.shape[1])]
pairs.sort(key=lambda t: -t[2])
print("\nTop 5 interaction pairs (mean |φ_ij|):")
for a, b, v in pairs[:5]:
print(f" {a:10s} × {b:10s} → {v:.4f}")
fig, ax = plt.subplots(figsize=(7.5, 6))
im = ax.imshow(off, cmap="viridis")
ax.set_xticks(range(X.shape[1])); ax.set_xticklabels(X.columns, rotation=45, ha="right")
ax.set_yticks(range(X.shape[1])); ax.set_yticklabels(X.columns)
plt.colorbar(im, label="mean |φ_ij|"); plt.title("Pairwise interaction strength")
plt.tight_layout(); plt.show()
a, b, _ = pairs[0]
i, j = X.columns.get_loc(a), X.columns.get_loc(b)
xs = X_te.iloc[:500][a].values; cs = X_te.iloc[:500][b].values
fig, axes = plt.subplots(1, 2, figsize=(13, 4), sharex=True)
axes[0].scatter(xs, inter[:, i, i], c=cs, s=12, cmap="coolwarm")
axes[0].set_title(f"Main effect of {a}"); axes[0].set_xlabel(a); axes[0].set_ylabel("φ_{ii}")
sc = axes[1].scatter(xs, 2*inter[:, i, j], c=cs, s=12, cmap="coolwarm")
axes[1].set_title(f"Interaction {a} × {b}"); axes[1].set_xlabel(a); axes[1].set_ylabel("2·φ_{ij}")
plt.colorbar(sc, ax=axes[1], label=b); plt.tight_layout(); plt.show()
print("\n" + "="*72)
print("PART 4: Link functions — logit vs probability space")
print("="*72)
cancer = load_breast_cancer()
Xc = pd.DataFrame(cancer.data, columns=cancer.feature_names)
yc = pd.Series(cancer.target)
clf = xgb.XGBClassifier(n_estimators=300, max_depth=4, learning_rate=0.05,
eval_metric="logloss", random_state=42).fit(Xc_tr, yc_tr)
print(f"AUC = {roc_auc_score(yc_te, clf.predict_proba(Xc_te)[:,1]):.3f}")
expl_logit = shap.TreeExplainer(clf)
sv_logit = expl_logit(Xc_te)
expl_prob = shap.TreeExplainer(clf, Xc_tr.sample(100, random_state=42),
model_output="probability")
sv_prob = expl_prob(Xc_te)
print(f"\nSample 0 reconstruction (φ should sum to f - E[f]):")
print(f" log-odds : base + Σφ = {sv_logit.base_values[0] + sv_logit.values[0].sum():+.3f}")
print(f" prob : base + Σφ = {sv_prob.base_values[0] + sv_prob.values[0].sum():.3f} "
f"(model proba = {clf.predict_proba(Xc_te.iloc[[0]])[0,1]:.3f})")
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
plt.sca(axes[0]); shap.plots.waterfall(sv_logit[0], max_display=8, show=False); axes[0].set_title("Log-odds space")
plt.sca(axes[1]); shap.plots.waterfall(sv_prob[0], max_display=8, show=False); axes[1].set_title("Probability space")
plt.tight_layout(); plt.show()


