@article{hagele2024scaling, author = {Alexander H\"agele and Elie Bakouch and Atli Kosson and Loubna Ben Allal and Leandro Von Werra and Martin Jaggi}, title = {{Scaling Laws and Compute-Optimal Training Beyond Fixed Training Durations}}, year = {2024}, journal = {Advances in Neural Information Processing Systems}, url = {http://arxiv.org/abs/2405.18392} }