Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- corruptedTrain = [] # The structure is [Cypher, Text]
- file = open(myDrive + 'Text.txt', 'r')
- for sentence in file:
- sentence = sentence.replace(' ', '').replace('\n', '').upper()
- if len(sentence) >= 32:
- sentence = sentence[:32] # Use first 32 chars.
- text = torch.tensor([vocab.index(i) for i in sentence])
- cypher = (text + NUMERIC_KEY) % 26 # Perform encryption
- dashNumber = random.randint(1, 4) # Number of corrupted chars.
- for _ in range(dashNumber):
- tryAgain = True
- while tryAgain: # Avoid placing two dashes on the same position.
- i = random.randint(0, len(sentence)-1)
- if cypher[i] != 25: # If there's not dash
- cypher[i] = 25
- tryAgain = False
- corruptedTrain.append([cypher, text])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement