1.fix all table size;2.fix some table max/2nd-max mark error;3.add fig & table caption mark;4.add some ref bibs

This commit is contained in:
D调E点
2026-03-23 12:20:48 +08:00
committed by panda361
parent cacdc79ae2
commit 965f2790dc
45 changed files with 380 additions and 275 deletions

98
ref.bib
View File

@@ -2563,3 +2563,101 @@ LargeST(引过了)
archivePrefix = {arXiv},
year = {2024}
}
@inproceedings{bogoychev2021not,
title={Not all parameters are born equal: Attention is mostly what you need},
author={Bogoychev, Nikolay},
booktitle={Proceedings of the fourth blackboxnlp workshop on analyzing and interpreting neural networks for nlp},
pages={363--374},
year={2021}
}
@article{olsson2022context,
title={In-context learning and induction heads},
author={Olsson, Catherine and Elhage, Nelson and Nanda, Neel and Joseph, Nicholas and DasSarma, Nova and Henighan, Tom and Mann, Ben and Askell, Amanda and Bai, Yuntao and Chen, Anna and others},
journal={arXiv preprint arXiv:2209.11895},
year={2022}
}
@inproceedings{rahaman2019spectral,
title={On the spectral bias of neural networks},
author={Rahaman, Nasim and Baratin, Aristide and Arpit, Devansh and Draxler, Felix and Lin, Min and Hamprecht, Fred and Bengio, Yoshua and Courville, Aaron},
booktitle={International conference on machine learning},
pages={5301--5310},
year={2019},
organization={PMLR}
}
@inproceedings{liu2022p,
title={P-tuning: Prompt tuning can be comparable to fine-tuning across scales and tasks},
author={Liu, Xiao and Ji, Kaixuan and Fu, Yicheng and Tam, Weng and Du, Zhengxiao and Yang, Zhilin and Tang, Jie},
booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
pages={61--68},
year={2022}
}
@article{dettmers2023qlora,
title={Qlora: Efficient finetuning of quantized llms},
author={Dettmers, Tim and Pagnoni, Artidoro and Holtzman, Ari and Zettlemoyer, Luke},
journal={Advances in neural information processing systems},
volume={36},
pages={10088--10115},
year={2023}
}
@article{caruana1997multitask,
title={Multitask learning},
author={Caruana, Rich},
journal={Machine learning},
volume={28},
number={1},
pages={41--75},
year={1997},
publisher={Springer}
}
@inproceedings{chen2018gradnorm,
title={Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks},
author={Chen, Zhao and Badrinarayanan, Vijay and Lee, Chen-Yu and Rabinovich, Andrew},
booktitle={International conference on machine learning},
pages={794--803},
year={2018},
organization={PMLR}
}
@inproceedings{godey2024anisotropy,
title={Anisotropy is inherent to self-attention in transformers},
author={Godey, Nathan and Clergerie, {\'E}ric and Sagot, Beno{\^\i}t},
booktitle={Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages={35--48},
year={2024}
}
@article{liu2018darts,
title={Darts: Differentiable architecture search},
author={Liu, Hanxiao and Simonyan, Karen and Yang, Yiming},
journal={arXiv preprint arXiv:1806.09055},
year={2018}
}
@article{frankle2019stabilizing,
title={Stabilizing the lottery ticket hypothesis},
author={Frankle, Jonathan and Dziugaite, Gintare Karolina and Roy, Daniel M and Carbin, Michael},
journal={arXiv preprint arXiv:1903.01611},
year={2019}
}
@article{ilharco2022editing,
title={Editing models with task arithmetic},
author={Ilharco, Gabriel and Ribeiro, Marco Tulio and Wortsman, Mitchell and Gururangan, Suchin and Schmidt, Ludwig and Hajishirzi, Hannaneh and Farhadi, Ali},
journal={arXiv preprint arXiv:2212.04089},
year={2022}
}
@article{devlin2018bert,
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
journal={arXiv preprint arXiv:1810.04805},
year={2018}
}