metadata
tags:
- spacy
- token-classification
language:
- tl
license: mit
library_name: spacy
pipeline_tag: token-classification
model-index:
- name: Medium-sized calamanCy pipeline by L.J. Miranda
results:
- task:
type: token-classification
name: Named Entity Recognition
dataset:
type: tlunified-ner
name: TLUnified-NER
split: test
revision: 3f7dab9d232414ec6204f8d6934b9a35f90a254f
metrics:
- type: f1
value: 0.889
name: F1
datasets:
- ljvmiranda921/tlunified-ner
calamanCy: Tagalog NLP pipelines in spaCy
Paper: arxiv.org/abs/2311.07171
Feature | Description |
---|---|
Name | tl_calamancy_lg |
Version | 0.1.0 |
spaCy | >=3.5.0,<4.0.0 |
Default Pipeline | tok2vec , tagger , morphologizer , parser , ner |
Components | tok2vec , tagger , morphologizer , parser , ner |
Vectors | 714435 keys, 714435 unique vectors (300 dimensions) |
Sources | TLUnified dataset (Jan Christian Blaise Cruz and Charibeth Cheng) UD_Tagalog-TRG (Stephanie Samson, Daniel Zeman, and Mary Ann C. Tan) UD_Tagalog-Ugnayan (Angelina Aquino) |
License | MIT |
Author | Lester James V. Miranda |
Label Scheme
View label scheme (120 labels for 4 components)
Component | Labels |
---|---|
tagger |
ADJ , ADJ_PART , ADP , ADV , ADV_PART , AUX , CCONJ , DET , DET_ADP , DET_PART , INTJ , NOUN , NOUN_PART , NUM , NUM_PART , PART , PRON , PRON_PART , PROPN , PUNCT , SCONJ , VERB , VERB_PART |
morphologizer |
Aspect=Perf|Mood=Ind|POS=VERB|Voice=Act , Case=Nom|POS=ADP , POS=NOUN , POS=PUNCT , Aspect=Perf|Mood=Ind|POS=VERB|Voice=Pass , Case=Gen|POS=ADP , Case=Gen|Number=Sing|POS=PRON|Person=1|PronType=Prs , Aspect=Imp|Mood=Ind|POS=VERB|Voice=Act , POS=ADV|PronType=Dem , Foreign=Yes|POS=NOUN , Degree=Pos|POS=ADJ , Case=Nom|Number=Sing|POS=PRON|Person=3|PronType=Prs , Case=Nom|Deixis=Med|Number=Sing|POS=PRON|PronType=Dem , Gender=Masc|POS=PROPN , Case=Gen|Number=Sing|POS=PRON|Person=3|PronType=Prs , Degree=Pos|Link=Yes|POS=ADJ , POS=ADP , Case=Dat|POS=ADP , POS=VERB|Polarity=Pos , Aspect=Hab|POS=VERB , POS=SCONJ , Case=Nom|Number=Sing|POS=PRON|Person=1|PronType=Prs , Aspect=Prosp|Mood=Ind|POS=VERB|Voice=Act , POS=ADV , POS=PART|Polarity=Neg , Aspect=Imp|Mood=Ind|POS=VERB|Voice=Pass , Aspect=Perf|Mood=Ind|POS=VERB|Voice=Lfoc , POS=PROPN , Case=Nom|Deixis=Prox|Number=Sing|POS=PRON|PronType=Dem , Gender=Masc|POS=NOUN , Gender=Fem|POS=NOUN , Degree=Pos|Gender=Fem|POS=ADJ , Gender=Fem|POS=PROPN , Case=Nom|Clusivity=In|Number=Dual|POS=PRON|Person=1|PronType=Prs , Number=Plur|POS=DET|PronType=Ind , Case=Nom|Number=Plur|POS=PRON|Person=3|PronType=Prs , POS=PRON|PronType=Prs|Reflex=Yes , Gender=Masc|POS=DET|PronType=Emp , Case=Nom|POS=PRON|PronType=Int , Link=Yes|POS=NOUN , POS=PART|PartType=Int , POS=INTJ|Polarity=Pos , Link=Yes|POS=PART|PartType=Int , POS=VERB|Polarity=Neg , Degree=Pos|POS=ADJ|PronType=Int , Case=Gen|Number=Plur|POS=PRON|Person=3|PronType=Prs , Aspect=Perf|Mood=Ind|POS=VERB|PronType=Int|Voice=Act , Case=Nom|Number=Sing|POS=PRON|Person=2|PronType=Prs , Aspect=Perf|Mood=Ind|POS=VERB|PronType=Int|Voice=Pass , Aspect=Perf|Mood=Ind|POS=VERB|Voice=Ifoc , POS=ADV|PronType=Int , Aspect=Prog|Mood=Ind|POS=VERB|Voice=Act , POS=PART|PartType=Nfh , Deixis=Remt|POS=ADV|PronType=Dem , Aspect=Imp|Mood=Pot|POS=VERB|Voice=Act , Link=Yes|POS=VERB|Polarity=Pos , Link=Yes|POS=VERB|Polarity=Neg , POS=PART|PartType=Des , Mood=Imp|POS=AUX|Polarity=Neg , Case=Nom|Link=Yes|Number=Plur|POS=PRON|Person=2|PronType=Prs , Case=Nom|Link=Yes|Number=Sing|POS=PRON|Person=3|PronType=Prs , Aspect=Prog|Mood=Ind|POS=VERB|Voice=Pass , Aspect=Prog|Mood=Ind|POS=VERB|Voice=Lfoc , Aspect=Prog|Mood=Ind|POS=VERB|Voice=Bfoc , POS=DET|PronType=Tot , Case=Dat|Link=Yes|Number=Sing|POS=PRON|Person=3|PronType=Prs , Link=Yes|POS=PRON|PronType=Prs|Reflex=Yes , Mood=Imp|POS=VERB|Voice=Act , Case=Dat|Number=Sing|POS=PRON|Person=3|PronType=Prs , Mood=Imp|POS=VERB|Voice=Lfoc , Case=Gen|Number=Sing|POS=PRON|Person=2|PronType=Prs , Mood=Imp|POS=VERB|Voice=Pass , Case=Gen|Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs , Aspect=Hab|POS=VERB|Voice=Pass , Gender=Masc|Link=Yes|POS=PROPN , Case=Gen|Link=Yes|Number=Sing|POS=PRON|Person=3|PronType=Prs , Case=Gen|Link=Yes|Number=Sing|POS=PRON|Person=1|PronType=Prs , POS=ADJ , POS=PART , POS=PRON , POS=VERB , POS=INTJ , POS=CCONJ , POS=NUM , POS=DET |
parser |
ROOT , advmod , case , dep , nmod , nsubj , obj , obl , punct |
ner |
LOC , ORG , PER |
Citation
@inproceedings{miranda-2023-calamancy,
title = "calaman{C}y: A {T}agalog Natural Language Processing Toolkit",
author = "Miranda, Lester James",
booktitle = "Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)",
month = dec,
year = "2023",
address = "Singapore, Singapore",
publisher = "Empirical Methods in Natural Language Processing",
url = "https://aclanthology.org/2023.nlposs-1.1",
pages = "1--7",
}