Spaces:

patrol114
/

YetiAI

Sleeping

App Files Files Community

YetiAI / app.py

patrol114

Update app.py

c60e96c verified about 2 months ago

raw

history blame contribute delete

3.02 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Inicjalizacja InferenceClient
	client = InferenceClient("01-ai/Yi-Coder-9B-Chat")

	# Inicjalizacja tokenizera i modelu
	model_path = "01-ai/Yi-Coder-9B-Chat"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto").eval()

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	use_local_model: bool,
	):
	# Przygotowanie wiadomości do kontekstu
	messages = [{"role": "system", "content": system_message}]
	for user, assistant in history:
	if user:
	messages.append({"role": "user", "content": user})
	if assistant:
	messages.append({"role": "assistant", "content": assistant})
	messages.append({"role": "user", "content": message})

	if use_local_model:
	# Użycie lokalnego modelu
	input_text = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
	input_ids = tokenizer.encode(input_text, return_tensors="pt")
	input_ids = input_ids.to(model.device)

	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	response = tokenizer.decode(output[0], skip_special_tokens=True)
	yield response.split("assistant:")[-1].strip()
	else:
	# Użycie Hugging Face Inference API
	response = ""
	for chunk in client.text_generation(
	"\n".join([f"{m['role']}: {m['content']}" for m in messages]),
	max_new_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	response += chunk
	yield response.split("assistant:")[-1].strip()

	# Tworzenie interfejsu Gradio
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="Odpowiadasz w języku polskim. Jesteś Coder/Developer/Programista i tworzysz pełny kod.",
	label="Wiadomość systemowa"
	),
	gr.Slider(minimum=1, maximum=2048, value=2048, step=1, label="Maksymalna liczba nowych tokenów"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperatura"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (próbkowanie nucleus)",
	),
	gr.Checkbox(label="Użyj lokalnego modelu", value=False),
	],
	title="Zaawansowany interfejs czatu AI",
	description="Czatuj z modelem AI, korzystając z Hugging Face Inference API lub lokalnego modelu.",
	)

	if __name__ == "__main__":
	demo.launch()