Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- from datetime import datetime
- from imp import reload
- import tensorflow as tf
- import nltk
- from nltk.corpus import wordnet
- from nltk.corpus import stopwords
- from nltk.tokenize import sent_tokenize
- from nltk.tokenize import word_tokenize
- from nltk.tag import pos_tag
- from nltk.stem import WordNetLemmatizer
- from nltk.corpus.reader.wordnet import WordNetError
- # import multiprocessing as mp
- from threading import Thread
- import sys
- import re
- import os
- file = "/home/suthagar/Desktop/scrapy/CS4642-DailyMirror-Tech/outputs/"
- all_files = os.listdir(file)
- fullData = []
- i=0
- if len(all_files) > 0:
- for inputFileName in all_files:
- inputFile = open(file + inputFileName, "r+")
- corpusLines = inputFile.readlines()
- # print(corpusLines[0])
- jsonData = json.loads(corpusLines[0])
- fullData.append(jsonData)
- # fullData += corpusLines
- print(inputFileName)
- # i+=1
- # if i>2:
- # break
- f = open(file + "output-all.json", "w")
- f.write(json.dumps(fullData))
- print("completed")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement