Advertisement
mesearcher

error

Feb 28th, 2024
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.72 KB | None | 0 0
  1. # эта часть кода падает с ошибкой
  2. tokenizer = transformers.BertTokenizer(
  3.     vocab_file='/datasets/ds_bert/vocab.txt')
  4.  
  5. tokenized = df_tweets['text'].apply(
  6.     lambda x: tokenizer.encode(x, add_special_tokens=True))
  7.  
  8. max_len = 0
  9. for i in tokenized.values:
  10.     if len(i) > max_len:
  11.         max_len = len(i)
  12.  
  13. padded = np.array([i + [0]*(max_len - len(i)) for i in tokenized.values])
  14.  
  15. attention_mask = np.where(padded != 0, 1, 0)
  16.  
  17.  
  18. # ошибка
  19. ---------------------------------------------------------------------------
  20. ValueError                                Traceback (most recent call last)
  21. ~\AppData\Local\Temp/ipykernel_1272/3208287495.py in <module>
  22. ----> 1 tokenizer = transformers.BertTokenizer(
  23.       2     vocab_file='/datasets/ds_bert/vocab.txt')
  24.       3
  25.       4 tokenized = df_tweets['text'].apply(
  26.       5     lambda x: tokenizer.encode(x, add_special_tokens=True))
  27.  
  28. C:\work\practicum\practicum_env\lib\site-packages\transformers\models\bert\tokenization_bert.py in __init__(self, vocab_file, do_lower_case, do_basic_tokenize, never_split, unk_token, sep_token, pad_token, cls_token, mask_token, tokenize_chinese_chars, strip_accents, **kwargs)
  29.     192
  30.     193         if not os.path.isfile(vocab_file):
  31. --> 194             raise ValueError(
  32.     195                 f"Can't find a vocabulary file at path '{vocab_file}'. To load the vocabulary from a Google pretrained "
  33.     196                 "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`"
  34.  
  35. ValueError: Can't find a vocabulary file at path '/datasets/ds_bert/vocab.txt'. To load the vocabulary from a Google pretrained model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement