1.fix all table size;2.fix some table max/2nd-max mark error;3.add fig & table caption mark;4.add some ref bibs

2026-03-23 12:20:48 +08:00
parent cacdc79ae2
commit 965f2790dc
45 changed files with 380 additions and 275 deletions
--- a/ref.bib
+++ b/ref.bib
@@ -2563,3 +2563,101 @@ LargeST(引过了)
  archivePrefix = {arXiv},
  year = {2024}
 }
+
+@inproceedings{bogoychev2021not,
+	title={Not all parameters are born equal: Attention is mostly what you need},
+	author={Bogoychev, Nikolay},
+	booktitle={Proceedings of the fourth blackboxnlp workshop on analyzing and interpreting neural networks for nlp},
+	pages={363--374},
+	year={2021}
+}
+
+@article{olsson2022context,
+	title={In-context learning and induction heads},
+	author={Olsson, Catherine and Elhage, Nelson and Nanda, Neel and Joseph, Nicholas and DasSarma, Nova and Henighan, Tom and Mann, Ben and Askell, Amanda and Bai, Yuntao and Chen, Anna and others},
+	journal={arXiv preprint arXiv:2209.11895},
+	year={2022}
+}
+
+@inproceedings{rahaman2019spectral,
+	title={On the spectral bias of neural networks},
+	author={Rahaman, Nasim and Baratin, Aristide and Arpit, Devansh and Draxler, Felix and Lin, Min and Hamprecht, Fred and Bengio, Yoshua and Courville, Aaron},
+	booktitle={International conference on machine learning},
+	pages={5301--5310},
+	year={2019},
+	organization={PMLR}
+}
+
+@inproceedings{liu2022p,
+	title={P-tuning: Prompt tuning can be comparable to fine-tuning across scales and tasks},
+	author={Liu, Xiao and Ji, Kaixuan and Fu, Yicheng and Tam, Weng and Du, Zhengxiao and Yang, Zhilin and Tang, Jie},
+	booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
+	pages={61--68},
+	year={2022}
+}
+
+@article{dettmers2023qlora,
+	title={Qlora: Efficient finetuning of quantized llms},
+	author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke},
+	journal={Advances in neural information processing systems},
+	volume={36},
+	pages={10088--10115},
+	year={2023}
+}
+
+@article{caruana1997multitask,
+	title={Multitask learning},
+	author={Caruana, Rich},
+	journal={Machine learning},
+	volume={28},
+	number={1},
+	pages={41--75},
+	year={1997},
+	publisher={Springer}
+}
+
+@inproceedings{chen2018gradnorm,
+	title={Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks},
+	author={Chen, Zhao and Badrinarayanan, Vijay and Lee, Chen-Yu and Rabinovich, Andrew},
+	booktitle={International conference on machine learning},
+	pages={794--803},
+	year={2018},
+	organization={PMLR}
+}
+
+@inproceedings{godey2024anisotropy,
+	title={Anisotropy is inherent to self-attention in transformers},
+	author={Godey, Nathan and Clergerie, {\'E}ric and Sagot, Beno{\^\i}t},
+	booktitle={Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+	pages={35--48},
+	year={2024}
+}
+
+@article{liu2018darts,
+	title={Darts: Differentiable architecture search},
+	author={Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
+	journal={arXiv preprint arXiv:1806.09055},
+	year={2018}
+}
+
+@article{frankle2019stabilizing,
+	title={Stabilizing the lottery ticket hypothesis},
+	author={Frankle, Jonathan and Dziugaite, Gintare Karolina and Roy, Daniel M and Carbin, Michael},
+	journal={arXiv preprint arXiv:1903.01611},
+	year={2019}
+}
+
+@article{ilharco2022editing,
+	title={Editing models with task arithmetic},
+	author={Ilharco, Gabriel and Ribeiro, Marco Tulio and Wortsman, Mitchell and Gururangan, Suchin and Schmidt, Ludwig and Hajishirzi, Hannaneh and Farhadi, Ali},
+	journal={arXiv preprint arXiv:2212.04089},
+	year={2022}
+}
+
+@article{devlin2018bert,
+	title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
+	author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
+	journal={arXiv preprint arXiv:1810.04805},
+	year={2018}
+}
+