Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- separator = "+++$+++"
- def main():
- movie_lines_file = "data/movie_lines.txt"
- movie_conversations_file = "data/movie_conversations.txt"
- line_idxs = {}
- with open(movie_lines_file, "r") as movie_lines:
- line = movie_lines.readline()
- while line:
- line = line.split(separator)
- line_idxs[line[0].strip()] = line[4].strip()
- line = movie_lines.readline()
- with open(movie_conversations_file, "r") as movie_conversations:
- line = movie_conversations.readline()
- counter = 0
- while line:
- line = line.split(separator)
- convo = line[3].strip()
- convo = convo.strip('[')
- convo = convo.strip(']')
- convo = convo.replace('\'', "")
- convo = convo.split(", ")
- with open("data/cornell_lines/{0}.txt".format(counter), "a+") as f:
- f.write("\n".join([line_idxs[idx] for idx in convo]))
- counter += 1
- line = movie_conversations.readline()
- if __name__=="__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement