T5-base-ddg / README.md
Vijayendra's picture
Update README.md
99ec6fb verified
metadata
license: mit
language:
  - en
base_model:
  - google-t5/t5-base
datasets:
  - li2017dailydialog/daily_dialog
metrics:
  - rouge

T5-base-ddg

This model is a fine-tuned version of T5 for open eneded dialog generation. It was finetuned on the Daily Dialog dataset for 35 epochs using Cyclic attention and custom loss.

Model Usage

Below is an example of how to load and use this model for summarization:

import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Set the device (use GPU if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the model and tokenizer from Hugging Face
tokenizer = T5Tokenizer.from_pretrained("Vijayendra/T5-base-ddg")
model = T5ForConditionalGeneration.from_pretrained("Vijayendra/T5-base-ddg").to(device)

# Define your prompts
input_prompts = [
    "I am having a bad day at work",
    "What should I do about my stress?",
    "How can I improve my productivity?",
    "I'm feeling very anxious today",
    "What is the best way to learn new skills?",
    "How do I deal with failure?",
    "What do you think about the future of technology?",
    "I want to improve my communication skills",
    "How can I stay motivated at work?",
    "What is the meaning of life?"
]

# Generate responses
generated_responses = {}
for prompt in input_prompts:
    inputs = tokenizer(prompt, return_tensors="pt", max_length=40, truncation=True, padding="max_length").to(device)
    
    model.eval()
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=100,
            num_beams=7,
            repetition_penalty=2.5,
            length_penalty=2.0,
            early_stopping=True
        )

    # Decode the generated response
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    generated_responses[prompt] = generated_text

# Display the input prompts and the generated responses
for prompt, response in generated_responses.items():
    print(f"Prompt: {prompt}")
    print(f"Response: {response}\n")





from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
import torch

# Set the device (use GPU if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load your T5 model and tokenizer
tokenizer = T5Tokenizer.from_pretrained("Vijayendra/T5-base-ddg")
model = T5ForConditionalGeneration.from_pretrained("Vijayendra/T5-base-ddg").to(device)

# Load the dataset - Replace with your dataset name
dataset = load_dataset('daily_dialog', split='test')

# Generate 10 responses from the test set
def generate_responses(dataset, num_responses=50):
    responses = []
    for i, data in enumerate(dataset):
        if i >= num_responses:
            break

        # Get the input prompt and reference response
        input_text = data['dialog'][0]  # Assuming the first dialog is the input prompt
        reference_text = data['dialog'][1]  # Assuming the second dialog is the expected response
        
        # Tokenize and generate response
        inputs = tokenizer(input_text, return_tensors="pt", max_length=40, truncation=True, padding="max_length").to(device)
        model.eval()
        with torch.no_grad():
            generated_ids = model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=40,
                num_beams=7,
                repetition_penalty=2.5,
                length_penalty=2.0,
                early_stopping=True
            )
        
        # Decode generated response
        generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
        
        # Append input, generated response, and reference
        responses.append({
            "Input Prompt": input_text,
            "Generated Response": generated_text,
            "Reference Response": reference_text
        })

    return responses

# Get the responses
responses = generate_responses(dataset)

# Print the results
for idx, response in enumerate(responses):
    print(f"Prompt {idx+1}: {response['Input Prompt']}")
    print(f"T5 Model Response: {response['Generated Response']}")
    print(f"Reference Response: {response['Reference Response']}\n")