1.fix table & pic title;2.fix table;
This commit is contained in:
74
chap06.tex
74
chap06.tex
@@ -218,27 +218,57 @@ CASCADE在冻结骨干的基础上使用下游任务的标准监督目标进行
|
||||
|
||||
CASCADE的主要超参数包括:低频DCT系数20K个、小波系数10K个、空域残差专家秩$r=48$、负载均衡和正交性损失权重均为0.01。CASCADE的整体训练流程如算法~\ref{alg:ch6_cascade}所示。
|
||||
|
||||
%\RestyleAlgo{ruled}
|
||||
%\begin{algorithm}[htp]
|
||||
%\caption[CASCADE:从粗到细的频谱级联适配算法]{CASCADE:从粗到细的频谱级联适配算法}
|
||||
%\label{alg:ch6_cascade}
|
||||
%\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
|
||||
%\KwOut{适配后的输出$\mathbf{y}$}
|
||||
%计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
|
||||
%\textbf{低频专家:}\;
|
||||
%构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse})\;
|
||||
%重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse})\;
|
||||
%\textbf{高频专家:}\;
|
||||
%构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse})\;
|
||||
%计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params})\;
|
||||
%应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film})\;
|
||||
%重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse})\;
|
||||
%\textbf{空域残差专家:}\;
|
||||
%计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update})\;
|
||||
%\textbf{路由与聚合:}\;
|
||||
%计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights})\;
|
||||
%聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
|
||||
%\Return{$\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}
|
||||
%\end{algorithm}
|
||||
\RestyleAlgo{ruled}
|
||||
\begin{algorithm}[htp]
|
||||
\caption[CASCADE:从粗到细的频谱级联适配算法]{CASCADE:从粗到细的频谱级联适配算法}
|
||||
\label{alg:ch6_cascade}
|
||||
\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
|
||||
\KwOut{适配后的输出$\mathbf{y}$}
|
||||
计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
|
||||
\textbf{低频专家:}\;
|
||||
构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse})\;
|
||||
重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse})\;
|
||||
\textbf{高频专家:}\;
|
||||
构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse})\;
|
||||
计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params})\;
|
||||
应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film})\;
|
||||
重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse})\;
|
||||
\textbf{空域残差专家:}\;
|
||||
计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update})\;
|
||||
\textbf{路由与聚合:}\;
|
||||
计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights})\;
|
||||
聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
|
||||
\Return{$\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}
|
||||
\caption[CASCADE:从粗到细的频谱级联适配算法]{CASCADE:从粗到细的频谱级联适配算法}
|
||||
\label{alg:ch6_cascade_2}
|
||||
\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
|
||||
\KwOut{适配后的输出$\mathbf{y}$}
|
||||
计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
|
||||
\textbf{低频专家:}\;
|
||||
\Indp
|
||||
构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse})\;
|
||||
重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse})\;
|
||||
\Indm
|
||||
\textbf{高频专家:}\;
|
||||
\Indp
|
||||
构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse})\;
|
||||
计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params})\;
|
||||
应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film})\;
|
||||
重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse})\;
|
||||
\Indm
|
||||
\textbf{空域残差专家:}\;
|
||||
\Indp
|
||||
计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update})\;
|
||||
\Indm
|
||||
\textbf{路由与聚合:}\;
|
||||
\Indp
|
||||
计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights})\;
|
||||
聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
|
||||
\Indm
|
||||
\textbf{计算适配后的输出 $\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}\;
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
@@ -449,7 +479,7 @@ MESSA定位于右下象限,用小型示意图展示其核心机制:参数组
|
||||
\toprule
|
||||
\textbf{骨干} & \textbf{方法} & \textbf{BoolQ} & \textbf{PIQA} & \textbf{SIQA} & \textbf{ARC-C} & \textbf{ARC-E} & \textbf{OBQA} & \textbf{HellaSwag} & \textbf{WinoGrande} & \textbf{Micro-Avg$\uparrow$} \\
|
||||
\midrule
|
||||
\multirow{7}{*}{\rotatebox{90}{\textbf{Qwen3-4B}}}
|
||||
\multirow{7}{*}{\textbf{Qwen3-4B}}
|
||||
& LoRA & 66.88 & 82.97 & \underline{73.59} & 86.86 & 92.21 & \underline{83.60} & 85.37 & \underline{68.75} & 81.27 \\
|
||||
& AdaLoRA & \underline{67.34} & 82.64 & 73.44 & 87.03 & 92.89 & 82.00 & 79.99 & 67.88 & 78.89 \\
|
||||
& BONE & 66.15 & 81.61 & 72.62 & 85.24 & 92.55 & 75.40 & 78.85 & 68.11 & 77.78 \\
|
||||
@@ -458,7 +488,7 @@ MESSA定位于右下象限,用小型示意图展示其核心机制:参数组
|
||||
& FlyLoRA & 66.51 & \underline{83.35} & 73.54 & \underline{87.20} & 93.06 & 78.20 & \underline{85.63} & 68.35 & \underline{81.33} \\
|
||||
& \textbf{CASCADE} & \textbf{67.74} & \textbf{83.46} & \textbf{75.49} & \textbf{87.88} & \textbf{93.64} & \textbf{86.40} & \textbf{85.75} & \textbf{71.98} & \textbf{82.22*} \\
|
||||
\midrule
|
||||
\multirow{7}{*}{\rotatebox{90}{\textbf{LLaMA3.2-3B}}}
|
||||
\multirow{7}{*}{\textbf{LLaMA3.2-3B}}
|
||||
& LoRA & 61.41 & 78.62 & 66.79 & 68.26 & 84.05 & 70.20 & 79.49 & \underline{56.35} & \underline{74.05} \\
|
||||
& AdaLoRA & \underline{61.53} & 78.89 & 67.04 & \underline{69.71} & 83.63 & 69.60 & 79.31 & 54.78 & 73.96 \\
|
||||
& BONE & 60.61 & 76.17 & 66.53 & 67.24 & 79.88 & 63.20 & 79.28 & 50.04 & 72.61 \\
|
||||
@@ -467,7 +497,7 @@ MESSA定位于右下象限,用小型示意图展示其核心机制:参数组
|
||||
& FlyLoRA & 59.02 & 78.94 & \underline{67.14} & 67.58 & \underline{84.22} & \underline{71.80} & \underline{79.66} & 52.49 & 73.64 \\
|
||||
& \textbf{CASCADE} & \textbf{62.66} & \textbf{80.69} & \textbf{67.40} & \textbf{69.97} & \textbf{84.68} & \textbf{73.60} & \textbf{79.94} & \textbf{62.59} & \textbf{75.25*} \\
|
||||
\midrule
|
||||
\multirow{7}{*}{\rotatebox{90}{\textbf{Gemma3-4B}}}
|
||||
\multirow{7}{*}{\textbf{Gemma3-4B}}
|
||||
& LoRA & 64.34 & 78.07 & \underline{70.21} & 75.26 & \underline{87.37} & 75.60 & \underline{77.97} & \underline{61.88} & \underline{75.21} \\
|
||||
& AdaLoRA & \underline{64.86} & \underline{79.16} & 69.91 & 75.68 & 86.87 & 72.00 & 77.19 & 61.17 & 74.84 \\
|
||||
& BONE & 63.67 & 78.35 & 69.19 & \underline{76.11} & 86.95 & 70.60 & 73.97 & 48.22 & 72.37 \\
|
||||
|
||||
Reference in New Issue
Block a user