1.fix table & pic title;2.fix table;

This commit is contained in:
D调E点
2026-03-23 14:32:56 +08:00
committed by panda361
parent 965f2790dc
commit 21707be041
5 changed files with 61 additions and 31 deletions

View File

@@ -218,27 +218,57 @@ CASCADE在冻结骨干的基础上使用下游任务的标准监督目标进行
CASCADE的主要超参数包括低频DCT系数20K个、小波系数10K个、空域残差专家秩$r=48$、负载均衡和正交性损失权重均为0.01。CASCADE的整体训练流程如算法~\ref{alg:ch6_cascade}所示。
%\RestyleAlgo{ruled}
%\begin{algorithm}[htp]
%\caption[CASCADE从粗到细的频谱级联适配算法]{CASCADE从粗到细的频谱级联适配算法}
%\label{alg:ch6_cascade}
%\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
%\KwOut{适配后的输出$\mathbf{y}$}
%计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
%\textbf{低频专家:}\;
%构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse}\;
%重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse}\;
%\textbf{高频专家:}\;
%构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse}\;
%计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params}\;
%应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film}\;
%重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse}\;
%\textbf{空域残差专家:}\;
%计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update}\;
%\textbf{路由与聚合:}\;
%计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights}\;
%聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
%\Return{$\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}
%\end{algorithm}
\RestyleAlgo{ruled}
\begin{algorithm}[htp]
\caption[CASCADE从粗到细的频谱级联适配算法]{CASCADE从粗到细的频谱级联适配算法}
\label{alg:ch6_cascade}
\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
\KwOut{适配后的输出$\mathbf{y}$}
计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
\textbf{低频专家:}\;
构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse}\;
重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse}\;
\textbf{高频专家:}\;
构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse}\;
计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params}\;
应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film}\;
重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse}\;
\textbf{空域残差专家:}\;
计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update}\;
\textbf{路由与聚合:}\;
计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights}\;
聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
\Return{$\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}
\caption[CASCADE从粗到细的频谱级联适配算法]{CASCADE从粗到细的频谱级联适配算法}
\label{alg:ch6_cascade_2}
\KwIn{输入激活$\mathbf{x}$,冻结权重矩阵$\mathbf{W}_0$}
\KwOut{适配后的输出$\mathbf{y}$}
计算基础输出 $\mathbf{y}_0 \leftarrow \mathbf{W}_0 \mathbf{x}$\;
\textbf{低频专家:}\;
\Indp
构建稀疏DCT频谱$\mathbf{S}_{\text{dct}}$(式~\ref{eq:ch6_dct_sparse}\;
重建全局更新$\Delta \mathbf{W}_{\text{dct}}$(式~\ref{eq:ch6_dct_inverse}\;
\Indm
\textbf{高频专家:}\;
\Indp
构建稀疏小波细节系数$\{\mathbf{B}_b\}_{b\in\mathcal{B}}$(式~\ref{eq:ch6_wavelet_sparse}\;
计算调制参数$(\gamma_b,\beta_b)_{b\in\mathcal{B}}$(式~\ref{eq:ch6_film_params}\;
应用频段级FiLM调制$\tilde{\mathbf{B}}_b$(式~\ref{eq:ch6_bandwise_film}\;
重建局部更新$\Delta \mathbf{W}_{\text{wav}}$(式~\ref{eq:ch6_wavelet_inverse}\;
\Indm
\textbf{空域残差专家:}\;
\Indp
计算残差更新$\Delta \mathbf{W}_{\text{spatial}}$(式~\ref{eq:ch6_spatial_update}\;
\Indm
\textbf{路由与聚合:}\;
\Indp
计算专家权重$\mathbf{w}$(式~\ref{eq:ch6_routing_weights}\;
聚合更新 $\Delta \mathbf{W} \leftarrow \sum_{e=1}^{E} w_e \cdot \Delta \mathbf{W}_e$\;
\Indm
\textbf{计算适配后的输出 $\mathbf{y} \leftarrow \mathbf{y}_0 + \Delta \mathbf{W}\mathbf{x}$}\;
\end{algorithm}
@@ -449,7 +479,7 @@ MESSA定位于右下象限用小型示意图展示其核心机制参数组
\toprule
\textbf{骨干} & \textbf{方法} & \textbf{BoolQ} & \textbf{PIQA} & \textbf{SIQA} & \textbf{ARC-C} & \textbf{ARC-E} & \textbf{OBQA} & \textbf{HellaSwag} & \textbf{WinoGrande} & \textbf{Micro-Avg$\uparrow$} \\
\midrule
\multirow{7}{*}{\rotatebox{90}{\textbf{Qwen3-4B}}}
\multirow{7}{*}{\textbf{Qwen3-4B}}
& LoRA & 66.88 & 82.97 & \underline{73.59} & 86.86 & 92.21 & \underline{83.60} & 85.37 & \underline{68.75} & 81.27 \\
& AdaLoRA & \underline{67.34} & 82.64 & 73.44 & 87.03 & 92.89 & 82.00 & 79.99 & 67.88 & 78.89 \\
& BONE & 66.15 & 81.61 & 72.62 & 85.24 & 92.55 & 75.40 & 78.85 & 68.11 & 77.78 \\
@@ -458,7 +488,7 @@ MESSA定位于右下象限用小型示意图展示其核心机制参数组
& FlyLoRA & 66.51 & \underline{83.35} & 73.54 & \underline{87.20} & 93.06 & 78.20 & \underline{85.63} & 68.35 & \underline{81.33} \\
& \textbf{CASCADE} & \textbf{67.74} & \textbf{83.46} & \textbf{75.49} & \textbf{87.88} & \textbf{93.64} & \textbf{86.40} & \textbf{85.75} & \textbf{71.98} & \textbf{82.22*} \\
\midrule
\multirow{7}{*}{\rotatebox{90}{\textbf{LLaMA3.2-3B}}}
\multirow{7}{*}{\textbf{LLaMA3.2-3B}}
& LoRA & 61.41 & 78.62 & 66.79 & 68.26 & 84.05 & 70.20 & 79.49 & \underline{56.35} & \underline{74.05} \\
& AdaLoRA & \underline{61.53} & 78.89 & 67.04 & \underline{69.71} & 83.63 & 69.60 & 79.31 & 54.78 & 73.96 \\
& BONE & 60.61 & 76.17 & 66.53 & 67.24 & 79.88 & 63.20 & 79.28 & 50.04 & 72.61 \\
@@ -467,7 +497,7 @@ MESSA定位于右下象限用小型示意图展示其核心机制参数组
& FlyLoRA & 59.02 & 78.94 & \underline{67.14} & 67.58 & \underline{84.22} & \underline{71.80} & \underline{79.66} & 52.49 & 73.64 \\
& \textbf{CASCADE} & \textbf{62.66} & \textbf{80.69} & \textbf{67.40} & \textbf{69.97} & \textbf{84.68} & \textbf{73.60} & \textbf{79.94} & \textbf{62.59} & \textbf{75.25*} \\
\midrule
\multirow{7}{*}{\rotatebox{90}{\textbf{Gemma3-4B}}}
\multirow{7}{*}{\textbf{Gemma3-4B}}
& LoRA & 64.34 & 78.07 & \underline{70.21} & 75.26 & \underline{87.37} & 75.60 & \underline{77.97} & \underline{61.88} & \underline{75.21} \\
& AdaLoRA & \underline{64.86} & \underline{79.16} & 69.91 & 75.68 & 86.87 & 72.00 & 77.19 & 61.17 & 74.84 \\
& BONE & 63.67 & 78.35 & 69.19 & \underline{76.11} & 86.95 & 70.60 & 73.97 & 48.22 & 72.37 \\