Spaces:

ttt-tkmr
/

QuickRecap

Sleeping

File size: 7,595 Bytes

demo_markdown = """要約の出力イメージ
## 会議要約

**会議の概要**

- 会議名: test
- 日時: 2xxx年x月x日
- 参加者: Aさん、Bさん、Cさん
- 議題: 要約の精度確認、今後の開発

**議論された内容**

1. 要約に用いる技術
    - AIモデルの一覧
      - xxx
      - xxx
2. ツールとして必要な機能
    - すでに実装されている機能
      - xxx
      - xxx
    - 開発が望まれる機能
      - xxx
3. 今後の開発プラン
    - 短期計画: xxx
    - 長期計画: xxx

**結論**

- xxx

**懸念点**

- xxx

**今後のアクションアイテム**

- xxx

## 会議の流れ
| タイムスタンプ | 話題 | 備考 |
| --- | --- | --- |
| 0:00 | 要約に用いる技術 | xxx |
| 0:58 | xxx | xxx |
| 2:30 | xxx | xxx |
"""


import os
import time

import gradio as gr
import google.generativeai as genai
import logging
logging.basicConfig(level=logging.DEBUG)


generation_config = {
  "temperature": 0,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE",
  },
]

def model_settings(api_key, system_prompt, model):
  genai.configure(api_key=api_key)
  model = genai.GenerativeModel(
    model_name="models/" + model,
    safety_settings=safety_settings,
    generation_config=generation_config,
    system_instruction=system_prompt,
  )
  chat_session = model.start_chat(
    history=[
    ]
  )

  return chat_session


def audio_system_prompt(meeting_name, members, point, keyword):
  system_prompt = f"""
  この音声は、{meeting_name}という会議の録音です。
  会議の参加者は、
  {members}
  です。
  この音声をもとに会議の要約を作成してください。
  音声に含まれていない情報を記載したり、冗長になったりしてはいけません。
  この会議固有の言葉は、{keyword}です。

  まず、会議の概要、何が議論されたのか、会議の結論、アクションアイテム、{point}を冒頭にまとめて記載してください。
  会議の概要には、会議名、日時、参加者を記載してください。開催日時などが不明の場合は「不明」と記載してください。
  また内容は漏れなく、重複のない形としてください。

  その後、会議の流れを記載します。
  会議の流れは下記の形式で、それぞれのタイムスタンプごとの話題を記載します。
  具体的な内容は整理して、概要に記載してください。表の中はすべて左詰となるようにします。
  | タイムスタンプ | 話題 | 概要 |
  """
  return system_prompt

def video_system_prompt(meeting_name, members, point, keyword):
  system_prompt = f"""
  この動画は、{meeting_name}という会議の録画です。
  会議の参加者は、
  {members}
  です。
  この動画をもとに会議の要約を作成してください。
  動画に含まれていない情報を記載したり、冗長になったりしてはいけません。
  この会議固有の言葉は、{keyword}です。

  まず、会議の概要、何が議論されたのか、会議の結論、アクションアイテム、{point}を冒頭にまとめて記載してください。
  会議の概要には、会議名、日時、参加者を記載してください。開催日時などが不明の場合は「不明」と記載してください。
  また内容は漏れなく、重複のない形としてください。

  その後、会議の流れを記載します。
  会議の流れは下記の形式で、それぞれのタイムスタンプごとの話題を記載します。
  具体的な内容は整理して、概要に記載してください。表の中はすべて左詰となるようにします。
  | タイムスタンプ | 話題 | 概要 |
  """
  return system_prompt

def gemini(api_key, input_file, meeting_name, members, point, keyword, model, task):
    if api_key is None:
        raise gr.Error("No api_key submitted!")
    if input_file is None:
        raise gr.Error("No input_file submitted! Please upload or record an file before submitting your request.")
    
    logging.warning(f"input file: {input_file}")
    upload_file = genai.upload_file(input_file)
    logging.warning(f"upload file: {upload_file}")
    
    # 音声 or 動画
    if task == "audio":
      system_prompt = audio_system_prompt(meeting_name, members, point, keyword)
    else:
      time.sleep(45)
      system_prompt = video_system_prompt(meeting_name, members, point, keyword)

    # 処理の実行
    chat_session = model_settings(api_key, system_prompt, model)
    
    response = chat_session.send_message([upload_file])

    upload_file.delete()

    return [[None, response.text]]


audio = gr.Interface(
    fn=gemini,
    inputs=[
        gr.Textbox(label="google ai studioのAPI key", show_label=True, placeholder="不明"),
        gr.Audio(type="filepath",label="Audio file"),
        gr.Textbox(label="会議名", show_label=True, placeholder="不明"),
        gr.TextArea(label="参加者： 1名ずつ改行して入力", show_label=True, placeholder="不明"),
        gr.Textbox(label="抽出事項：　概要、何が議論されたのか、会議の結論、アクションアイテム以外に抽出したいこと", show_label=True),
        gr.Textbox(label="固有名詞：　企業名など", show_label=True),
        gr.Radio(["gemini-1.5-flash-latest", "gemini-1.5-pro-latest"], label="使用モデル", show_label=True, value="gemini-1.5-flash-latest"),
        gr.Textbox(value="audio", visible=False),
    ],
    outputs=[gr.Chatbot(show_copy_button=True, render_markdown=True, height=1000, value=[[None, demo_markdown]])],
    title="📝 QuickRecap v1",
    description ="↑select souce.  It automatically summarizes meeting audio and video.",
    allow_flagging="never",
)

video = gr.Interface(
    fn=gemini,
    inputs=[
        gr.Textbox(label="google ai studioのAPI key", show_label=True, placeholder="不明"),
        gr.Video(label="Video file", show_download_button=True),
        gr.Textbox(label="会議名", show_label=True, placeholder="不明"),
        gr.TextArea(label="参加者： 1名ずつ改行して入力", show_label=True, placeholder="不明"),
        gr.Textbox(label="抽出事項：　概要、何が議論されたのか、会議の結論、アクションアイテム以外に抽出したいこと", show_label=True),
        gr.Textbox(label="固有名詞：　企業名など", show_label=True),
        gr.Radio(["gemini-1.5-flash-latest", "gemini-1.5-pro-latest"], label="使用モデル", show_label=True, value="gemini-1.5-pro-latest"),
        gr.Textbox(value="video", visible=False),
    ],
    outputs=[gr.Chatbot(show_copy_button=True, render_markdown=True, height=1000, value=[[None, demo_markdown]])],
    title="📝 QuickRecap v1",
    description ="↑select souce.  It automatically summarizes meeting audio and video.",
    allow_flagging="never",
)


demo = gr.Blocks()
with demo:
    gr.TabbedInterface([audio, video], ["🔈Audio", "🎥Video"])

demo.launch()