by , , , , , ,
Reference:
Group Robust Preference Optimization in Reward-free RLHF S. S. Ramesh, Y. Hu, I. Chaimalas, V. Mehta, P. G. Sessa, H. B. Ammar, I. BogunovicIn Proc. Neural Information Processing Systems (NeurIPS), 2024
Bibtex Entry:
@inproceedings{ramesh2024group,
	author = {Ramesh, Shyam Sundhar and Hu, Yifan and Chaimalas, Iason and Mehta, Viraj and Sessa, Pier Giuseppe and Ammar, Haitham Bou and Bogunovic, Ilija},
	booktitle = {Proc. Neural Information Processing Systems (NeurIPS)},
	month = {December},
	pdf = {https://arxiv.org/pdf/2405.20304},
	title = {Group Robust Preference Optimization in Reward-free RLHF},
	year = {2024}}