Untitled

    option = 'read'

    if option == 'download':
        print ('* Downloading the latest Wikidata dump.')
        url = "https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.json.gz" # download the dump: https://dumps.wikimedia.org/wikidatawiki/entities/20180212/
        local_filename = url.split('/')[-1]
        # NOTE the stream=True parameter
        r = requests.get(url, stream=True)
        with open(dumps_path + local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=10240):
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    f.flush()
        read_dump = databases_path + local_filename

    if option == 'read': # 8 hours to process the 2% when read from the other server. sembla que hi ha un problema i és que llegir el dump és més lent que descarregar-lo.
        read_dump = '/public/dumps/public/wikidatawiki/entities/latest-all.json.gz'
        local_filename = url.split('/')[-1]
        try:
            shutil.copyfile(filename, dumps_path + local_filename)
            print ('Wikidata Dump copied.')
        except:
            print ('Not possible to copy the wikidata dump.')

    dump_in = gzip.open(read_dump, 'r')
    line = dump_in.readline()
    iter = 0

    n_qitems = 85696352

    print ('Iterating the dump.')
    while line != '':
        iter += 1
        line = dump_in.readline()
        line = line.rstrip().decode('utf-8')[:-1]

        try:
            entity = json.loads(line)
            qitem = entity['id']
            if not qitem.startswith('Q'): continue

			# OPERATIONS

        except:
            print ('JSON error.')