zero-shot-labse / app.py
grofte's picture
Added some model limitations
16b8a36
import scipy
import gradio as gr
from sentence_transformers import SentenceTransformer
title = "Zero-Shot Text Classification with Hugging Face"
# Gradio interprets the description as markdown or html
description = """LaBSE - Language-agnostic BERT Sentence Embedding via sbert.net
Forked from https://huggingface.co/spaces/eldoraboo/zero-shot
LaBSE is trained on data from 109 languages but can also provide embeddings for unseen languages.
In the example below, we use LaBSE to classify a text into one of the given labels.
LaBSE correctly recognizes that "poisson" means fish and that the fish label is the most likely label.
The similarity between the text and the labels is measured using cosine similarity which is the angle between vectors.
This means that smaller values indicate a higher similarity.
Once you play around with it you should find that the model is quite brittle.
It is bad at making the connection between "jam" and "marmalade" for example.
It gives same or more weight to adjectives like "red" and "orange" than to nouns like "jam" and "poison".
"""
model = SentenceTransformer('sentence-transformers/LaBSE')
#define a function to process your input and output
def zero_shot(doc, candidates):
given_labels = candidates.split(", ")
texts = [doc] + given_labels
embeddings = model.encode(texts)
query_embedding = embeddings[0]
candidate_embeddings = embeddings[1:]
distances = scipy.spatial.distance.cdist([query_embedding], candidate_embeddings, "cosine")[0]
distances = [round(x, 3) for x in distances]
results = zip(given_labels, distances)
results = sorted(results, key=lambda x: x[1], reverse=False)
return results
#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")
#input object 2
input2 = gr.Textbox(label="Labels")
#output object
output = gr.Dataframe(type='array', headers=['Label', 'Distance'], label="Output")
#example object
examples = [
["Eco-friendly Hydrogen: The clean fuel of the future Germany is promoting the use of #eco-friendly hydrogen in the fight against climate change. Hydrogen can replace fossil fuels in virtually every situation, in an engine or fuel cell!", "renewable, politics, emission, temperature, emergency, advertisment"],
["I like to eat salmon.", "fish, meat, vegetable"],
["poisson", "jam, milk, fish, eggs, poison"],
["orange jam", "red jam, white milk, silver fish, brown eggs, orange poison"],
["TDC A/S provides communications and entertainment solutions in Denmark. It operates through Nuuday and TDC NET segments. The company designs, builds, and operates broadband and mobile networks; and provides technical support to customers and networks. It offers services, such as landline voice, TV and streaming, broadband, Internet and network, mobility, and other services. The company provides its products and services under the YouSee, Hiper, Telmore, Blockbuster, TDC Business, TDC Erhverv, Fullrate, NetDesign, and Relatel brands. It serves consumer and business customers. The company was founded in 1882 and is based in Copenhagen, Denmark. TDC A/S is a subsidiary of DK Telekommunikation ApS.", "Diversified Telecommunication Services, Wireless Telecommunication Services, Media, Entertainment, Interactive Media and Services"],
["Giddy Inc., doing business as Boxed Wholesale, offers online wholesale and retailing services. The company provides cleaning and laundry, kitchen, paper, skin care, hair care, and grocery products. Additionally, it offers diapers and organic products. Giddy Inc. was founded in 2013 and is based in Edison, New Jersey.", "Food and Staples Retailing, Beverages, Food Products, Household Products, Personal Products, Tobacco"],
["United Iron And Steel Manufacturing Company (P.L.C.) produces and sells iron and steel products in Jordan. It is also involved in trading scrap iron. The company was incorporated in 1992 and is headquartered in Amman, Jordan. United Iron And Steel Manufacturing Company (P.L.C.) is a subsidiary of Manaseer Group Corporation.", "Chemicals, Construction Materials, Containers and Packaging, Metals and Mining, Paper and Forest Products"]
]
#create interface
gui = gr.Interface(title=title,
description=description,
fn=zero_shot,
inputs=[input1, input2],
outputs=[output],
examples=examples)
#display the interface
gui.launch()