by , , , , , ,
Reference:
Group Robust Preference Optimization in Reward-free RLHF S. S. Ramesh, Y. Hu, I. Chaimalas, V. Mehta, P. G. Sessa, H. B. Ammar, I. BogunovicIn arXiv preprint arXiv:2405.20304, 2024
Bibtex Entry:
@article{ramesh2024group,
  title={Group Robust Preference Optimization in Reward-free RLHF},
  author={Ramesh, Shyam Sundhar and Hu, Yifan and Chaimalas, Iason and Mehta, Viraj and Sessa, Pier Giuseppe and Ammar, Haitham Bou and Bogunovic, Ilija},
  journal={arXiv preprint arXiv:2405.20304},
  year={2024}
}