File size: 2,247 Bytes
0509158
4413a8c
0509158
 
9e7b09f
 
0509158
 
 
 
8e1e53d
 
 
916749d
 
0509158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4413a8c
0509158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47b897f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import spaces
from transformers import pipeline

token_skill_classifier = pipeline(model="jjzha/escoxlmr_skill_extraction", aggregation_strategy="first", device=0)
token_knowledge_classifier = pipeline(model="jjzha/escoxlmr_knowledge_extraction", aggregation_strategy="first", device=0)


examples = [
        "Knowing Python is a plus",
        "Je hebt aantoonbaar ervaring met sleutelen aan fietsen",
        "Du har en relevant datavidenskabelig, matematisk, økonomisk, ingeniør- eller it-mæssig baggrund",
        "Du besitzt einen Führerschein der Klasse B",
        "Vous aimez les projets de grande envergure et vous savez traiter des données en grande quantité",
        "Per avere successo in questo ruolo, dovrai avere una forte motivazione, una grande determinazione e non necessariamente un'esperienza nel settore."
        ]


def aggregate_span(results):
    new_results = []
    current_result = results[0]

    for result in results[1:]:
        if result["start"] == current_result["end"] + 1:
            current_result["word"] += " " + result["word"]
            current_result["end"] = result["end"]
        else:
            new_results.append(current_result)
            current_result = result

    new_results.append(current_result)

    return new_results


@spaces.GPU
def ner(text):
    output_skills = token_skill_classifier(text)
    for result in output_skills:
        if result.get("entity_group"):
            result["entity"] = "Skill"
            del result["entity_group"]

    output_knowledge = token_knowledge_classifier(text)
    for result in output_knowledge:
        if result.get("entity_group"):
            result["entity"] = "Knowledge"
            del result["entity_group"]

    if len(output_skills) > 0:
        output_skills = aggregate_span(output_skills)
    if len(output_knowledge) > 0:
        output_knowledge = aggregate_span(output_knowledge)

    return {"text": text, "entities": output_skills}, {"text": text, "entities": output_knowledge}


demo = gr.Interface(fn=ner,
                    inputs=gr.Textbox(placeholder="Enter sentence here..."),
                    outputs=["highlight", "highlight"],
                    examples=examples)

demo.launch()