LLMver_v1
BIN
assets/5_bias_comparison.pdf
Normal file
BIN
assets/5_modulation_range.pdf
Normal file
BIN
assets/5_rope_response.pdf
Normal file
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 766 KiB |
|
Before Width: | Height: | Size: 717 KiB |
|
Before Width: | Height: | Size: 2.6 MiB |
|
Before Width: | Height: | Size: 151 KiB |
|
Before Width: | Height: | Size: 632 KiB |
|
Before Width: | Height: | Size: 70 KiB After Width: | Height: | Size: 70 KiB |
@@ -1 +0,0 @@
|
||||
|
||||
|
Before Width: | Height: | Size: 174 KiB |
|
Before Width: | Height: | Size: 602 KiB |
|
Before Width: | Height: | Size: 70 KiB |
|
Before Width: | Height: | Size: 331 KiB |
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 766 KiB |
|
Before Width: | Height: | Size: 917 KiB |
@@ -1,25 +0,0 @@
|
||||
\begin{figure}[t]
|
||||
% \captionsetup[subfigure]{labelformat=simple, labelsep=period}
|
||||
% \renewcommand\thesubfigure{\alph{subfigure})} % 将子标题的标签格式改为 "a)"
|
||||
\centering
|
||||
\begin{subfigure}[b]{0.47\linewidth} % PD:平衡一下图片大小,如果一样的图可以都用0.48
|
||||
\hspace{-3px} % PD: 往左挪点防止重心偏右
|
||||
\includegraphics[width=\linewidth]{assets/Layer10.pdf}
|
||||
% \includegraphics[width=\linewidth]{assets/Layer10_norubost.pdf}
|
||||
\caption{Across Head Dimensions} % 子图标题留空即可自动生成 (a)
|
||||
\label{fig:attnindim}
|
||||
\end{subfigure}
|
||||
\hfill % 在两张图之间插入一个弹性空白,使它们左右对齐
|
||||
\begin{subfigure}[b]{0.48\linewidth}
|
||||
\hspace{-3px} % PD: 往左挪点防止重心偏右
|
||||
\includegraphics[width=\linewidth]{assets/AcrossLayer.pdf}
|
||||
\caption{Across Layers}
|
||||
\label{fig:attninlayer}
|
||||
\end{subfigure}
|
||||
\caption{Q-state activation strength visualizations in LLaMA-2-7B.
|
||||
We compute the average L2 norm per attention head to quantify activation strength.
|
||||
Stronger activations are concentrated in high-indexed (\ie low-RoPE frequency) dimensions and vary across layers, highlighting both dimension-wise and layer-wise heterogeneity.
|
||||
}
|
||||
\label{fig:hotattn}
|
||||
\end{figure}
|
||||
% \py{font size of figure is too small}
|
||||
|
Before Width: | Height: | Size: 408 KiB |
@@ -1,7 +0,0 @@
|
||||
% \begin{figure*}[ht]
|
||||
% \centering
|
||||
% \includegraphics[width=0.7\linewidth]{assets/model2.pdf}
|
||||
% \caption{MESSA framework with shared--specific sparse updates. Sparse structures are learned via budget-aware soft gating and overlap regularization, and hardened through a soft-to-hard training process under a unified parameter budget.}
|
||||
% % \caption{The architecture of CAM and HyCAM framework. HyCAM applies a hybrid CAM mechanism to the output of the Attention module within each Transformer block, while the backbone LLM remains frozen. Specifically, HyCAM integrates a shared, full-parameter CAM module and multiple lightweight Specialized CAMs for common and task-specific knowledge.} % with a dynamic routing strategy. % adaptively coordinates the contributions of these specialized modules.
|
||||
% \label{fig:framework}
|
||||
% \end{figure*}
|
||||
|
Before Width: | Height: | Size: 249 KiB |
|
Before Width: | Height: | Size: 5.0 MiB |
|
Before Width: | Height: | Size: 264 KiB |
|
Before Width: | Height: | Size: 717 KiB |
|
Before Width: | Height: | Size: 151 KiB |
|
Before Width: | Height: | Size: 160 KiB |
|
Before Width: | Height: | Size: 632 KiB |
|
Before Width: | Height: | Size: 484 KiB |
|
Before Width: | Height: | Size: 2.6 MiB |
|
Before Width: | Height: | Size: 87 KiB |
|
Before Width: | Height: | Size: 152 KiB |