huseinzol05
commited on
Commit
•
223aa52
1
Parent(s):
6e72ee3
fix readme
Browse files- README.md +7 -7
- convert-from-malaya.ipynb +0 -7
README.md
CHANGED
@@ -2,13 +2,13 @@
|
|
2 |
language: ms
|
3 |
---
|
4 |
|
5 |
-
# t5-
|
6 |
|
7 |
-
Pretrained T5
|
8 |
|
9 |
## Pretraining Corpus
|
10 |
|
11 |
-
`t5-
|
12 |
|
13 |
1. Language masking task on bahasa news, bahasa Wikipedia, bahasa Academia.edu, bahasa parliament and translated The Pile.
|
14 |
2. News title prediction on bahasa news.
|
@@ -35,8 +35,8 @@ You can use this model by installing `torch` or `tensorflow` and Huggingface lib
|
|
35 |
```python
|
36 |
from transformers import T5Tokenizer, T5Model
|
37 |
|
38 |
-
model = T5Model.from_pretrained('malay-huggingface/t5-
|
39 |
-
tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-
|
40 |
```
|
41 |
|
42 |
## Example using T5ForConditionalGeneration
|
@@ -44,8 +44,8 @@ tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-small-bahasa-cased
|
|
44 |
```python
|
45 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
46 |
|
47 |
-
tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-
|
48 |
-
model = T5ForConditionalGeneration.from_pretrained('malay-huggingface/t5-
|
49 |
input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')
|
50 |
outputs = model.generate(input_ids)
|
51 |
print(tokenizer.decode(outputs[0]))
|
|
|
2 |
language: ms
|
3 |
---
|
4 |
|
5 |
+
# t5-tiny-bahasa-cased
|
6 |
|
7 |
+
Pretrained T5 tiny language model for Malay.
|
8 |
|
9 |
## Pretraining Corpus
|
10 |
|
11 |
+
`t5-tiny-bahasa-cased` model was pretrained on multiple tasks. Below is list of tasks we trained on,
|
12 |
|
13 |
1. Language masking task on bahasa news, bahasa Wikipedia, bahasa Academia.edu, bahasa parliament and translated The Pile.
|
14 |
2. News title prediction on bahasa news.
|
|
|
35 |
```python
|
36 |
from transformers import T5Tokenizer, T5Model
|
37 |
|
38 |
+
model = T5Model.from_pretrained('malay-huggingface/t5-tiny-bahasa-cased')
|
39 |
+
tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-tiny-bahasa-cased')
|
40 |
```
|
41 |
|
42 |
## Example using T5ForConditionalGeneration
|
|
|
44 |
```python
|
45 |
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
46 |
|
47 |
+
tokenizer = T5Tokenizer.from_pretrained('malay-huggingface/t5-tiny-bahasa-cased')
|
48 |
+
model = T5ForConditionalGeneration.from_pretrained('malay-huggingface/t5-tiny-bahasa-cased')
|
49 |
input_ids = tokenizer.encode('soalan: siapakah perdana menteri malaysia?', return_tensors = 'pt')
|
50 |
outputs = model.generate(input_ids)
|
51 |
print(tokenizer.decode(outputs[0]))
|
convert-from-malaya.ipynb
CHANGED
@@ -596,13 +596,6 @@
|
|
596 |
"source": [
|
597 |
"!rm -rf t5-tiny-v2"
|
598 |
]
|
599 |
-
},
|
600 |
-
{
|
601 |
-
"cell_type": "code",
|
602 |
-
"execution_count": null,
|
603 |
-
"metadata": {},
|
604 |
-
"outputs": [],
|
605 |
-
"source": []
|
606 |
}
|
607 |
],
|
608 |
"metadata": {
|
|
|
596 |
"source": [
|
597 |
"!rm -rf t5-tiny-v2"
|
598 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
599 |
}
|
600 |
],
|
601 |
"metadata": {
|