Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- XARGS_CMD = ("ls ./shards/ | "
- "xargs -n 1 -P {} -I{} "
- "python3 bert/create_pretraining_data.py "
- "--input_file=./shards/{} "
- "--output_file={}/{}.tfrecord "
- "--vocab_file={} "
- "--do_lower_case={} "
- "--max_predictions_per_seq={} "
- "--max_seq_length={} "
- "--masked_lm_prob={} "
- "--random_seed=34 "
- "--dupe_factor=5")
- XARGS_CMD = XARGS_CMD.format(PROCESSES, '{}', '{}', PRETRAINING_DIR, '{}',
- VOC_FNAME, DO_LOWER_CASE,
- MAX_PREDICTIONS, MAX_SEQ_LENGTH, MASKED_LM_PROB)
- tf.gfile.MkDir(PRETRAINING_DIR)
- !$XARGS_CMD
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement