code

import os
import glob
import time
from urllib.request import urlopen
import pandas as pd
import xml.etree.ElementTree as ET
count=0
files=glob.glob('./extract/isbnlist/Reihe*_isbn-dnb2.csv',recursive=True) #searches all files in folder
print(files)

for file in files:
    if count==0: #to only go through the first file, instead of all files in the folder
        csvfile = pd.read_csv(file, sep='\t', encoding='utf-8')
        for row in csvfile['URL']:
            print('row: ' + row)
            with urlopen(str(row)) as response:
                doc = ET.parse(response)
                root = doc.getroot()
                namespaces = {  # Manually extracted from the XML file, but there could be code written to automatically do that.
            "zs": "http://www.loc.gov/zing/srw/",
            "": "http://www.loc.gov/MARC21/slim",
                }
            datafield_nodes_path = "./zs:records/zs:record/zs:recordData/record/datafield"  # XPath
            datafield_attribute_filters = [ #which fields to extract
            {
            "tag": "100", #author
            "ind1": "1",
            "ind2": " ",
            }]
            #datafield_attribute_filters = []  # Decomment this line to clear filters (and process each datafield node)
            aut = []
            for datafield_node in root.iterfind(datafield_nodes_path, namespaces=namespaces):
                if datafield_attribute_filters:
                    skip_node = True
                    for attr_dict in datafield_attribute_filters:
                        for k, v in attr_dict.items():
                            if datafield_node.get(k) != v:
                                break
                        else:
                            skip_node = False
                            break
                    if skip_node:
                        continue
                for subfield_node in datafield_node.iterfind("./subfield[@code='a']", namespaces=namespaces):
                    aut.append(subfield_node.text) #this gets the author name and title

            print(aut)
        count+=1