fix: tokenizer can be none in preprocessors
Browse files- processing_aria.py +5 -1
processing_aria.py
CHANGED
@@ -18,6 +18,7 @@
|
|
18 |
# under the License.
|
19 |
|
20 |
import inspect
|
|
|
21 |
import re
|
22 |
from typing import List, Optional, Union
|
23 |
|
@@ -34,6 +35,8 @@ from transformers.tokenization_utils import (
|
|
34 |
|
35 |
from .vision_processor import AriaVisionProcessor
|
36 |
|
|
|
|
|
37 |
|
38 |
class AriaProcessor(ProcessorMixin):
|
39 |
"""
|
@@ -73,7 +76,7 @@ class AriaProcessor(ProcessorMixin):
|
|
73 |
else:
|
74 |
self.tokenizer = tokenizer
|
75 |
|
76 |
-
if self.tokenizer.pad_token is None:
|
77 |
self.tokenizer.pad_token = self.tokenizer.unk_token
|
78 |
|
79 |
self.image_token = image_token
|
@@ -229,6 +232,7 @@ class AriaProcessor(ProcessorMixin):
|
|
229 |
**cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
|
230 |
)
|
231 |
if "use_fast" in kwargs:
|
|
|
232 |
kwargs.pop("use_fast")
|
233 |
try:
|
234 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
18 |
# under the License.
|
19 |
|
20 |
import inspect
|
21 |
+
import logging
|
22 |
import re
|
23 |
from typing import List, Optional, Union
|
24 |
|
|
|
35 |
|
36 |
from .vision_processor import AriaVisionProcessor
|
37 |
|
38 |
+
logger = logging.getLogger(__name__)
|
39 |
+
|
40 |
|
41 |
class AriaProcessor(ProcessorMixin):
|
42 |
"""
|
|
|
76 |
else:
|
77 |
self.tokenizer = tokenizer
|
78 |
|
79 |
+
if self.tokenizer is not None and self.tokenizer.pad_token is None:
|
80 |
self.tokenizer.pad_token = self.tokenizer.unk_token
|
81 |
|
82 |
self.image_token = image_token
|
|
|
232 |
**cls._extract_kwargs(AriaVisionProcessor.from_pretrained, **kwargs),
|
233 |
)
|
234 |
if "use_fast" in kwargs:
|
235 |
+
logger.warning("use_fast is not supported for AriaProcessor. Ignoring...")
|
236 |
kwargs.pop("use_fast")
|
237 |
try:
|
238 |
tokenizer = AutoTokenizer.from_pretrained(
|