Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # эта часть кода падает с ошибкой
- tokenizer = transformers.BertTokenizer(
- vocab_file='/datasets/ds_bert/vocab.txt')
- tokenized = df_tweets['text'].apply(
- lambda x: tokenizer.encode(x, add_special_tokens=True))
- max_len = 0
- for i in tokenized.values:
- if len(i) > max_len:
- max_len = len(i)
- padded = np.array([i + [0]*(max_len - len(i)) for i in tokenized.values])
- attention_mask = np.where(padded != 0, 1, 0)
- # ошибка
- ---------------------------------------------------------------------------
- ValueError Traceback (most recent call last)
- ~\AppData\Local\Temp/ipykernel_1272/3208287495.py in <module>
- ----> 1 tokenizer = transformers.BertTokenizer(
- 2 vocab_file='/datasets/ds_bert/vocab.txt')
- 3
- 4 tokenized = df_tweets['text'].apply(
- 5 lambda x: tokenizer.encode(x, add_special_tokens=True))
- C:\work\practicum\practicum_env\lib\site-packages\transformers\models\bert\tokenization_bert.py in __init__(self, vocab_file, do_lower_case, do_basic_tokenize, never_split, unk_token, sep_token, pad_token, cls_token, mask_token, tokenize_chinese_chars, strip_accents, **kwargs)
- 192
- 193 if not os.path.isfile(vocab_file):
- --> 194 raise ValueError(
- 195 f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
- 196 "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
- ValueError: Can't find a vocabulary file at path '/datasets/ds_bert/vocab.txt'. To load the vocabulary from a Google pretrained model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement