Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- set +o posix
- JSON="foo.json"
- CORPUS="/home/gwern/wiki/docs/ai/2019-03-06-gpt2-poetry-1000samples.txt"
- encode() {
- TMP_FILE=$(mktemp /tmp/XXXX.txt)
- echo "$@" >> $TMP_FILE
- ENCODED=$(PYTHONPATH=src python encode2.py --model_name 2019-03-06-gwern-gpt2-poetry-projectgutenberg-network519407 $TMP_FILE)
- echo "$ENCODED"; }
- generateJson() {
- echo "{\"query\": [], \"sample0\": $2, \"sample1\": $3, \"best\": $1}," >> $JSON; }
- rm -rf /tmp/poem-samples/; mkdir /tmp/poem-samples/
- cat "$CORPUS" | fgrep -v '=' | split --lines=60 - /tmp/poem-samples/sample-
- echo "[" >> $JSON
- for POEM in `ls /tmp/poem-samples/sample-*`; do
- FIRST=$(head -30 $POEM)
- FIRST_ENCODED=$(encode "$FIRST")
- SECOND=$(tail -30 $POEM)
- SECOND_ENCODED=$(encode "$SECOND")
- echo "$FIRST"
- echo "============================================="
- echo "$SECOND"
- echo "" # print a newline to make output easier to read and divide from the foregoing
- echo "1: First wins| 2: Second wins"
- read -N 1 RATING
- case "$RATING" in
- $'\n')
- # skip
- ;;
- 1)
- generateJson 0 "$FIRST_ENCODED" "$SECOND_ENCODED"
- ;;
- 2)
- generateJson 1 "$FIRST_ENCODED" "$SECOND_ENCODED"
- ;;
- esac
- done
- echo "]" >> $JSON
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement