Spaces:

yutohub
/

japanese-chatbot-arena-leaderboard

Running

App Files Files Community

japanese-chatbot-arena-leaderboard / app.py

yutohub

Add data source

efece16 verified 10 months ago

raw

history blame

13.2 kB

	import json
	import os
	import random
	import time

	import pandas as pd
	import requests
	import streamlit as st


	# 環境変数
	with open("models_info.json", "r") as json_file:
	MODELS_INFO = json.load(json_file)
	with open("test.csv", "r") as file:
	QUESTION_DF = pd.read_csv(file)
	MODELS = list(MODELS_INFO.keys())
	NUM_QUESTION = 100


	# ランキングを取得
	def get_leaderboard():
	try:
	response = requests.get(os.environ['DARABASE_URL'])
	response_data = response.json()
	return response_data
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"

	# リーダーボードを作成
	def create_leaderboard_df():
	# リーダーボードを取得
	ranking = get_leaderboard()
	# エラー処理
	if ranking == "Error":
	st.error("リーダーボードを取得できませんでした。")
	print("リーダーボードを取得できませんでした。") # ログを表示
	return pd.DataFrame()
	else:
	# データの初期化
	ranks, model_names, ratings, organizations, licenses = [], [], [], [], []
	# リーダーボードの作成
	for i in range(len(ranking)):
	ranks.append(i + 1)
	model_names.append(MODELS_INFO[ranking[i]["model"]][0])
	ratings.append(ranking[i]["rating"])
	organizations.append(MODELS_INFO[ranking[i]["model"]][2])
	licenses.append(MODELS_INFO[ranking[i]["model"]][1])
	# データフレームを返す
	return pd.DataFrame({
	"ランク" : ranks,
	"🤖 モデル" : model_names,
	"⭐️ Eloレーティング" : ratings,
	"🏢 組織" : organizations,
	"📃 ライセンス" : licenses
	})

	# サーバーから回答を取得
	@st.cache_data
	def get_answer(model_name, question_id):
	try:
	params = {'modelName': model_name, 'questionId': question_id}
	response = requests.get(os.environ['ANSWER_URL'], params=params)
	response_data = response.json()
	return response_data["answer"]
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"

	# サーバーに回答を送信
	def send_choice(question_id, model_a, model_b, winner, language):
	# エラー処理 (データが入力されていない場合)
	if not question_id or not model_a or not model_b or not winner or not language:
	st.error("データが入力されていないため、回答を送信できませんでした。")
	print("質問と回答を取得してください。") # ログを表示
	return "Error"
	try:
	data = {
	"question_id": question_id,
	"model_a": model_a,
	"model_b": model_b,
	"winner": winner,
	"language": language,
	"tstamp": time.time(),
	}
	headers = {
	'Content-Type': 'application/json'
	}
	response = requests.post(os.environ['DARABASE_URL'], headers=headers, data=json.dumps(data))
	response_data = response.text
	return response_data
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"


	### Callback Functions ###
	# ステートの初期化を行う
	def handle_init_state():
	if "chat_history_a" not in st.session_state:
	st.session_state["chat_history_a"] = []
	if "chat_history_b" not in st.session_state:
	st.session_state["chat_history_b"] = []
	if "question_id" not in st.session_state:
	st.session_state["question_id"] = None
	if "model_a" not in st.session_state:
	st.session_state["model_a"] = None
	if "model_b" not in st.session_state:
	st.session_state["model_b"] = None
	if "question" not in st.session_state:
	st.session_state["question"] = None
	# ボタンの状態を初期化
	if "question_loaded" not in st.session_state:
	st.session_state["question_loaded"] = False
	# 送信を状態を初期化
	if "answer_sent" not in st.session_state:
	st.session_state["answer_sent"] = False

	# 質問と回答を取得する
	def handle_init_question():
	# エラー処理
	if st.session_state.question_loaded:
	st.session_state.question_loaded = False
	st.session_state.chat_history_a = []
	st.session_state.chat_history_b = []
	st.error("ボタンを連打しないでください。")
	print("既に質問と回答を取得しています。") # ログを表示
	else:
	# ボタンの状態を更新
	st.session_state.question_loaded = True
	st.success("質問と回答を取得しています。しばらくお待ちください。")
	# 質問を取得
	st.session_state.question_id = random.randint(1, NUM_QUESTION)
	st.session_state.question = QUESTION_DF["input"][st.session_state.question_id - 1]
	st.session_state.chat_history_a.append({"role": "user", "content": st.session_state.question})
	st.session_state.chat_history_b.append({"role": "user", "content": st.session_state.question})
	# 回答を取得
	random.shuffle(MODELS)
	st.session_state.model_a = MODELS[0]
	st.session_state.model_b = MODELS[1]
	answer_a = get_answer(st.session_state.model_a, st.session_state.question_id)
	answer_b = get_answer(st.session_state.model_b, st.session_state.question_id)
	# チャット履歴を更新
	st.session_state.chat_history_a.append({"role": "assistant", "content": answer_a})
	st.session_state.chat_history_b.append({"role": "assistant", "content": answer_b})
	st.success("質問と回答を取得しました。回答を選択してください。")
	print("質問と回答を取得しました。") # ログを表示

	# ユーザーの回答を送信する
	def handle_send_choice(winner):
	# エラー処理
	if st.session_state.answer_sent:
	st.error("既に回答を送信しています。")
	print("既に回答を送信しています。") # ログを表示
	else:
	# ボタンの状態を更新
	st.session_state.answer_sent = True
	# ユーザーの回答を送信
	response = send_choice(
	question_id=st.session_state.question_id,
	model_a=st.session_state.model_a,
	model_b=st.session_state.model_b,
	winner=winner,
	language="Japanese"
	)
	# エラーが発生した場合
	if response == "Error":
	st.error("予期せぬエラーが発生しました。")
	else:
	st.success("選択肢は正常に送信されました。")
	# 初期化
	st.session_state.question_loaded = False


	# 表示部分
	def main():
	# page config
	st.set_page_config(
	page_title="日本語チャットボットアリーナ",
	page_icon="🏆",
	layout="wide",
	)

	# ステートの初期化
	handle_init_state()
	# 説明を表示
	st.markdown("# 🏆 日本語チャットボットアリーナ")
	st.markdown("## 📖 説明")
	st.markdown("\| [Twitter](https://twitter.com/yutohub) \| [GitHub](https://github.com/yutohub) \| [ブログ](https://zenn.dev/yutohub) \|")
	st.markdown("日本語チャットボットアリーナは、日本語に対応しているLLMの評価のためのクラウドソーシングプラットフォームです。[LMSYS Chatbot Arena](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) を参考に、日本語に対応しているLLMのリーダーボードを作成することを目的としています。また、一部の質問と回答は、 [ELYZA-tasks-100](https://huggingface.co/datasets/elyza/ELYZA-tasks-100) や [Northern-System-Service/gpt4-autoeval](https://github.com/Northern-System-Service/gpt4-autoeval) を利用しています。")
	st.markdown(""" > 注意事項:
	>
	> 日本語チャットボットアリーナが提供する情報によって生じたいかなる損害についても、サービス提供者は一切の責任を負いません。
	> 日本語チャットボットアリーナは開発中であり、予告なく停止または終了する可能性があります。
	> また、ユーザーの回答を収集し、Creative Commons Attribution (CC-BY) または同様のライセンスの下で配布する権利を留保しています。
	""")

	# チャット履歴の表示部分
	st.markdown("## ⚔️ チャットボットアリーナ ⚔️")
	st.markdown(" 2つの匿名モデル (ChatGPT、Llama など) の回答を見て、より良いモデルに投票してください。")
	with st.expander(f"🔍 展開するとアリーナに参加している {len(MODELS)} 個のモデルの一覧が表示されます。"):
	st.write(MODELS)
	model_a, model_b = st.columns([1, 1])
	with model_a:
	st.markdown("### モデル A")
	if not st.session_state.chat_history_a:
	st.markdown("質問を取得してください。")
	else:
	for message in st.session_state.chat_history_a:
	with st.chat_message(message["role"]):
	st.write(message["content"])
	# 送信後に正解のモデルを表示する
	if st.session_state.answer_sent:
	with st.chat_message("assistant"):
	st.markdown(f"`{st.session_state.model_a}` が回答しました、")
	with model_b:
	st.markdown("### モデル B")
	if not st.session_state.chat_history_b:
	st.markdown("質問を取得してください。")
	else:
	for message in st.session_state.chat_history_b:
	with st.chat_message(message["role"]):
	st.write(message["content"])
	# 送信後に正解のモデルを表示する
	if st.session_state.answer_sent:
	with st.chat_message("assistant"):
	st.markdown(f"`{st.session_state.model_b}` が回答しました。")
	# 質問を取得する
	load_question = st.button(
	label="質問を取得",
	on_click=handle_init_question,
	# 回答済みの場合 or 質問を取得済の場合はボタンを無効化
	disabled=st.session_state.answer_sent or st.session_state.question_loaded,
	type="primary",
	use_container_width=True
	)
	# 回答を送信する
	choice_1, choice_2, choice_3, choice_4 = st.columns([1, 1, 1, 1])
	with choice_1:
	choice_1 = st.button(
	label="👈 Aの方が良い",
	on_click=handle_send_choice,
	args=("model_a",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_2:
	choice_2 = st.button(
	label="👉 Bの方が良い",
	on_click=handle_send_choice,
	args=("model_b",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_3:
	choice_3 = st.button(
	label="🤝 どちらも良い",
	on_click=handle_send_choice,
	args=("tie",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_4:
	choice_4 = st.button(
	label="👎 どちらも悪い",
	on_click=handle_send_choice,
	args=("tie (bothbad)",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)

	# リーダーボードを表示する
	st.markdown("## 🏆 リーダーボード")
	st.markdown(f"合計で {len(MODELS)} 個のモデルがアリーナに参加しています。30 分毎にリーダーボードが更新されます。")
	# 回答を送信した場合のみ表示する
	if st.session_state.answer_sent:
	# リーダーボードを取得
	leaderboard = create_leaderboard_df()
	st.dataframe(
	data=leaderboard,
	height=(len(MODELS) + 1) * 35 + 3,
	use_container_width=True,
	hide_index=True,
	)
	else:
	st.markdown("""
	> まずは、「⚔️ チャットボットアリーナ ⚔️」に回答を送信してください。
	> 回答を送信すると、リーダーボードが表示されます。
	""")

	# 引用を表示する
	st.markdown("## 📚 引用")
	st.markdown("""
	```
	@misc{elyzatasks100,
	title={ELYZA-tasks-100: 日本語instructionモデル評価データセット},
	url={https://huggingface.co/elyza/ELYZA-tasks-100},
	author={Akira Sasaki and Masato Hirakawa and Shintaro Horie and Tomoaki Nakamura},
	year={2023},
	}
	```

	[(c) 2023 Northern System Service Co., Ltd.](https://github.com/Northern-System-Service/gpt4-autoeval/blob/main/LICENSE)
	""")


	if __name__ == "__main__":
	main()