\begin{figure}[t] % \captionsetup[subfigure]{labelformat=simple, labelsep=period} % \renewcommand\thesubfigure{\alph{subfigure})} % 将子标题的标签格式改为 "a)" \centering \begin{subfigure}[b]{0.47\linewidth} % PD:平衡一下图片大小,如果一样的图可以都用0.48 \hspace{-3px} % PD: 往左挪点防止重心偏右 \includegraphics[width=\linewidth]{assets/Layer10.pdf} % \includegraphics[width=\linewidth]{assets/Layer10_norubost.pdf} \caption{Across Head Dimensions} % 子图标题留空即可自动生成 (a) \label{fig:attnindim} \end{subfigure} \hfill % 在两张图之间插入一个弹性空白,使它们左右对齐 \begin{subfigure}[b]{0.48\linewidth} \hspace{-3px} % PD: 往左挪点防止重心偏右 \includegraphics[width=\linewidth]{assets/AcrossLayer.pdf} \caption{Across Layers} \label{fig:attninlayer} \end{subfigure} \caption{Q-state activation strength visualizations in LLaMA-2-7B. We compute the average L2 norm per attention head to quantify activation strength. Stronger activations are concentrated in high-indexed (\ie low-RoPE frequency) dimensions and vary across layers, highlighting both dimension-wise and layer-wise heterogeneity. } \label{fig:hotattn} \end{figure} % \py{font size of figure is too small}