Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from csv import writer
- from os import listdir
- from os.path import isfile
- from sys import argv
- import re
- HTML_FILENAME_REGEX = re.compile(r'.*\.html?')
- TITLES = [
- 'Имя компьютера',
- 'Тип ЦП',
- 'Системная плата',
- 'Дисковый накопитель',
- 'Тип ядра ОС',
- ]
- TITLES_REGEXPS = [
- (
- title,
- re.compile(
- f'<TR>.*<TD>{title} <TD>'
- r'(?:<A.*?>)?(?P<value>.*?)(?:</A>)?$',
- re.MULTILINE
- )
- ) for title in TITLES
- ]
- def parse(filename):
- with open(filename, encoding='windows-1251') as input_file:
- data = input_file.read()
- row = []
- for title, regex in TITLES_REGEXPS:
- values = []
- for match in regex.finditer(data):
- value = match.group('value')
- if title != 'Дисковый накопитель' or 'USB Device' not in value:
- values.append(value)
- if title == 'Имя компьютера':
- break
- row.append('\n'.join(values))
- return row
- def main(files):
- if not files:
- files = [
- filename for filename in listdir('.')
- if HTML_FILENAME_REGEX.match(filename) and isfile(filename)
- ]
- with open('report.csv', 'w', encoding='windows-1251') as output_file:
- csv = writer(output_file)
- csv.writerow(TITLES)
- for filename in files:
- row = parse(filename)
- csv.writerow(row)
- if __name__ == '__main__':
- main(argv[1:])
Add Comment
Please, Sign In to add comment