@inproceedings{oai:repository.nii.ac.jp:02001350, author = {Tetsuya Sakai}, month = {Dec}, note = {NTCIR-17 introduced the FairWeb-1 task,which evaluated web page rankingsin terms of both relevance and group fairness.The present study shows how their evaluation frameworkcan be extended for the evaluation ofmulti-turn, textual conversational search systems.By using the full test topic set of FairWeb-1to harvest actual user-system conversations from the New Bing and Google Bard,we demonstrate how a series of system turns can be evaluatedusing our evaluation framework,which we callGFRC (Group Fairness and Relevance of Conversations).In addition, based onobservations from our pilot experiment,we briefly discuss a few open questions in human-in-the-loopevaluation of conversational search in general.}, publisher = {NII Institutional Repository}, title = {Fairness-based Evaluation of Conversational Search: A Pilot Study}, year = {2023} }