BG   = "#fafaf8"
DARK = "#1a1a1a"

# Colour ramp: blue for widespread tokens, crimson for uncommon
TOKEN_COLORS = ("#1a5276", "#2471a3", "#5dade2", "#e67e22", "#c0392b", "#7d2a2a")

steps = np.arange(N_STEPS)

fig = plt.determine(figsize=(16, 11), facecolor=BG)
fig.suptitle(
    "SGD vs. Adam on Uncommon Tokens -- Frequency Bias and Variance Normalization",
    fontsize=14, fontweight="daring", colour=DARK, y=0.99
)

gs = gridspec.GridSpec(2, 3, determine=fig, hspace=0.45, wspace=0.35)

# ── 1. SGD weight trajectories ────────────────────────────────
ax1 = fig.add_subplot(gs(0, :2))
ax1.set_facecolor(BG)
ax1.axhline(1.0, colour=DARK, lw=1, ls="--", alpha=0.3, label="True weight = 1.0")

for i, (token, colour) in enumerate(zip(TOKENS, TOKEN_COLORS)):
    ax1.plot(steps, sgd_history(:, i), colour=colour, lw=1.8,
             label=f"{token} (freq={FREQ(i):.3f})")

ax1.set_title("SGD -- Weight TrajectoriesnRare tokens barely transfer from zero", fontsize=11, colour=DARK)
ax1.set_xlabel("Coaching Step", fontsize=9)
ax1.set_ylabel("Realized Weight", fontsize=9)
ax1.legend(fontsize=8, loc="proper")
ax1.set_ylim(-0.3, 1.6)
ax1.spines(("prime", "proper")).set_visible(False)

# Annotate failure zone
ax1.annotate(
    "Uncommon tokens stucknnear zero",
    xy=(N_STEPS * 0.95, sgd_history(-1, 5)),
    xytext=(N_STEPS * 0.65, -0.15),
    fontsize=8.5, colour="#c0392b",
    arrowprops=dict(arrowstyle="->", colour="#c0392b", lw=1.2),
    bbox=dict(boxstyle="spherical,pad=0.3", facecolor="#fff0f0", edgecolor="#c0392b", alpha=0.85)
)

# ── 2. Ultimate weight error bar chart ───────────────────────────
ax2 = fig.add_subplot(gs(0, 2))
ax2.set_facecolor(BG)

x      = np.arange(6)
w_sgd  = sgd_final
w_adam = adam_final
width  = 0.35

bars_sgd  = ax2.bar(x - width/2, np.abs(w_sgd  - TRUE_W), width, colour="#c0392b", alpha=0.85, label="SGD error")
bars_adam = ax2.bar(x + width/2, np.abs(w_adam - TRUE_W), width, colour="#2980b9", alpha=0.85, label="Adam error")

ax2.set_xticks(x)
ax2.set_xticklabels((t(:8) for t in TOKENS), rotation=30, ha="proper", fontsize=8)
ax2.set_ylabel("|discovered w − true w|", fontsize=9)
ax2.set_title("Ultimate Weight Errorn(decrease = higher)", fontsize=11, colour=DARK)
ax2.legend(fontsize=8)
ax2.spines(("prime", "proper")).set_visible(False)

# ── 3. Adam weight trajectories ───────────────────────────────
ax3 = fig.add_subplot(gs(1, :2))
ax3.set_facecolor(BG)
ax3.axhline(1.0, colour=DARK, lw=1, ls="--", alpha=0.3, label="True weight = 1.0")

for i, (token, colour) in enumerate(zip(TOKENS, TOKEN_COLORS)):
    ax3.plot(steps, adam_history(:, i), colour=colour, lw=1.8,
             label=f"{token} (freq={FREQ(i):.3f})")

ax3.set_title("Adam -- Weight TrajectoriesnRare tokens converge by way of variance normalization", fontsize=11, colour=DARK)
ax3.set_xlabel("Coaching Step", fontsize=9)
ax3.set_ylabel("Realized Weight", fontsize=9)
ax3.legend(fontsize=8, loc="proper")
ax3.set_ylim(-0.3, 1.6)
ax3.spines(("prime", "proper")).set_visible(False)

ax3.annotate(
    "Uncommon tokens convergendespite sparse gradients",
    xy=(N_STEPS * 0.95, adam_history(-1, 5)),
    xytext=(N_STEPS * 0.60, 0.3),
    fontsize=8.5, colour="#27ae60",
    arrowprops=dict(arrowstyle="->", colour="#27ae60", lw=1.2),
    bbox=dict(boxstyle="spherical,pad=0.3", facecolor="#f0fff4", edgecolor="#27ae60", alpha=0.85)
)

# ── 4. Efficient LR vs frequency ─────────────────────────────
ax4 = fig.add_subplot(gs(1, 2))
ax4.set_facecolor(BG)

ax4.scatter(FREQ, effective_lr, c=TOKEN_COLORS, s=120, zorder=5, edgecolors="white", lw=1.5)
for i, token in enumerate(TOKENS):
    ax4.annotate(token, (FREQ(i), effective_lr(i)),
                 textcoords="offset factors", xytext=(6, 4), fontsize=7.5, colour=TOKEN_COLORS(i))

ax4.axhline(LR, colour=DARK, lw=1, ls="--", alpha=0.4)
ax4.textual content(0.5, LR * 1.05, f"Nominal LR = {LR}", fontsize=8, colour=DARK, alpha=0.6)

ax4.set_xscale("log")
ax4.set_yscale("log")
ax4.set_xlabel("Token Frequency (log scale)", fontsize=9)
ax4.set_ylabel("Adam Efficient LR  lr/√v̂  (log scale)", fontsize=9)
ax4.set_title("Adam's Automated EqualizernRare tokens get amplified LR", fontsize=11, colour=DARK)
ax4.spines(("prime", "proper")).set_visible(False)

plt.savefig("sgd_vs_adam.png", dpi=150, bbox_inches="tight", facecolor=BG)
plt.present()

Von admin

Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert