Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- ####### FIRST COPY&PASTE FUNCTION DEFINITIONS [OR DOWNLOAD&IMPORT] #######
- ### alterTextVals <-- from https://pastebin.com/DSXcgQyC
- ### getElementAttr, selectElement___, htreeToDict <-- from https://pastebin.com/BpjZSQPi
- ### [htreeToDict is the function we're calling, but it depends on the others]
- # fetching and parsing html
- r = requests.get('https://www.gelbeseiten.de/suche/architekturb%c3%bcros/aachen?umkreis=21000')
- print(f'Error Message "{r.raise_for_status()}" to GET', r.url)
- soup = BeautifulSoup(r.content, 'html5lib')
- # preparing the second input (a dictionary of selectors and attributes)
- selRef = {
- 'docTitle': ('head title', '', ''),
- 'headText': ('head', '', [('elimWhite', '')]),
- 'headLinks': ('head *[href]', ('localName', 'href'), 'list'),
- 'bodyText': ('body', '', [('elimWhite', ' '), ('truncSplit...', 100)]),
- 'resultsCt': ('#mod-TrefferlisteInfo', '', ''),
- 'resultStr': ('.mod-TrefferlisteInfo', '', ''),
- 'listings1': (
- 'article[id^="treffer_"]:has(p.mod-Treffer--besteBranche)'
- , {
- 'articleId': ('', 'id', [('replace', ('treffer_', 1))]),
- 'Title': ('h2[data-wipe-name="Titel"]', '', ''),
- 'Branch': ('p.mod-Treffer--besteBranche', '', ''),
- 'Address': ('p[data-wipe-name="Adresse"]', '', ''),
- 'Contact': ('p[data-wipe-name="Kontaktdaten"]', '', ''),
- 'Website': ('a.contains-icon-homepage[href]', 'href', ''),
- 'Email': ('a.contains-icon-email[href^="mailto:"]', 'href', [
- ('replace', ('mailto:', 1)), ('word1', '?')
- ]), 'DetailsPage': ('a.contains-icon-details[href]', 'href', '')
- } , 'list'),
- 'listings2': (
- 'article[id^="treffer_"]:not(:has(p.mod-Treffer--besteBranche))'
- , {
- 'articleId': ('', 'id', [('replace', ('treffer_', 1))]),
- 'Title': ('h2[data-wipe-name="Titel"]', '', '')
- }, 'list'),
- }
- ## calling the function
- htreeToDict(soup, selRef)
- ################# THE OUTPUT (pasted from terminal) #################
- {
- 'docTitle': 'ᐅ Top 10 Architekturbüros Aachen | ✉ Adresse | ☎ Telefonnummer | 📝 Kontakt | ✅ Bewertungen ➤ Jetzt auf GelbeSeiten.de ansehen.',
- 'headText': 'ᐅTop10ArchitekturbürosAachen|✉Adresse|☎Telefonnummer|📝Kontakt|✅Bewertungen➤JetztaufGelbeSeiten.deansehen.',
- 'headLinks': [('base', '/'),
- ('link', '/webgs/css/global_above.css?1668512157093'),
- ('link', '/webgs/css/global_above.css?1668512157093'),
- ('link', 'https://wwa.wipe.de/wwa.js'),
- ('link', 'https://a.delivery.consentmanager.net/delivery/cmp.php?id=15760'),
- ('link', 'https://cdn.consentmanager.net/delivery/js/cmp_de.min.js'),
- ('link', 'https://consentmanager.mgr.consensu.org'),
- ('link', '/webgs/css/trefferliste_above.css?1668512157093'),
- ('link', '/webgs/css/trefferliste_above.css?1668512157093'),
- ('link', '/webgs/css/schaufenster_global.css?1668512157093'),
- ('link', 'https://www.gelbeseiten.de/suche/architekturb%c3%bcros/aachen?umkreis=21000'),
- ('link', '/webgs/fonts/TheSansB4-3_Light.woff2'),
- ('link', '/webgs/fonts/TheSansB4-5_Plain.woff2'),
- ('link', '/webgs/fonts/TheSansB4-7_Bold.woff2'),
- ('link', 'https://data-a0f0ae1310.gelbeseiten.de/sensor.modern.ncl.min.js'),
- ('link', '/webgs/css/global_below.css?1668512157093'),
- ('link', '/webgs/css/global_below.css?1668512157093'),
- ('link', '/webgs/libraries/manifest.json'),
- ('link', 'https://ad13.adfarm1.adition.com'),
- ('link', '/webgs/css/trefferliste_below.css?1668512157093'),
- ('link', '/webgs/css/trefferliste_below.css?1668512157093'),
- ('link', 'https://imagesrv.adition.com')],
- 'bodyText': 'Suchen>Was & Wo SucheBranchenkatalogService>FÜR SIEV...experten.deEin Service IhrerGelbe Seiten Verlage',
- 'resultsCt': '326',
- 'resultStr': '326Treffer\n\tfürArchitekturbürosinAachen',
- 'listings1': [{'articleId': '128049362580',
- 'Title': 'JOCHUM MARTIN Dipl.Ing. Architekt AKNW',
- 'Branch': 'Architektur',
- 'Address': 'Kardinalstr. 7,52070 Aachen922 m',
- 'Contact': '0241 15 87 07',
- 'Website': 'http://www.bauplanungsbuero.net',
- 'DetailsPage': 'https://www.gelbeseiten.de/gsbiz/f52ce153-0e47-410c-b357-fd4c69a15d9c'},
- {'articleId': '128038111883',
- 'Title': 'Architektur Hammers',
- 'Branch': 'Architektur',
- 'Address': 'Melatener Str. 82,52074 Aachen1,5 km',
- 'Contact': '0241 87 79 37',
- 'Website': 'http://www.architektur-hammers.de',
- 'DetailsPage': 'https://www.gelbeseiten.de/gsbiz/fa71f80d-db96-4279-b97c-46afbd7bd356'}],
- 'listings2': [
- {'articleId': '128046113791', 'Title': 'Hautmann + Metz Architekten Partnerschaft mbB'},
- {'articleId': '128026414465', 'Title': 'Werrens M. Dipl.-Ing.'},
- {'articleId': '128026414714', 'Title': 'Leidinger U. Dipl.-Ing.'},
- {'articleId': '128055874705', 'Title': 'Ortmanns Edith'},
- {'articleId': '128026416297', 'Title': 'Ince H. Dipl.-Ing.'},
- {'articleId': '128027601198', 'Title': 'g.u.t. Architekten Gerhards u. Thomé'},
- {'articleId': '128026423021', 'Title': 'Barhainski HorstHorst Architekt'},
- {'articleId': '128027879691', 'Title': 'Daheim + Uppenkamp Architektur-Ing.-Büro Architekten'},
- {'articleId': '128026421674', 'Title': 'gmp Generalplanungsgesellschaft mbH Architekturbüro'},
- {'articleId': '128026418543', 'Title': 'Aymen S.'},
- {'articleId': '128036948668', 'Title': 'Ritter Karl-Heinz Architekt'},
- {'articleId': '128026418055', 'Title': 'Rennecke G. Dipl.-Ing.'},
- {'articleId': '128027945884', 'Title': 'IParch GmbH'},
- {'articleId': '9190128871613690', 'Title': 'Michael Golombek GAP Architekturbüro und Projektsteuerung'},
- {'articleId': '128026417128', 'Title': 'Klinge Manfred Architekturbüro'},
- {'articleId': '128026420536', 'Title': 'Stollmann P.P.H. Dipl.-Ing.'},
- {'articleId': '128026419763', 'Title': 'Kasaci Okyay Architekt'},
- {'articleId': '128047603432', 'Title': 'pfeiffer.volland architekten'},
- {'articleId': '9190128866480780', 'Title': 'Dipl.-Ing. D. Körkel-Greuel und Dipl.-Ing. J. Greuel Architekturbüro'},
- {'articleId': '128026420850', 'Title': 'Glashaus Architekturbüro'},
- {'articleId': '9190128869930960', 'Title': 'Dipl.-Ing. Jan Konwinski Architekturbüro'},
- {'articleId': '128026416136', 'Title': 'Spielmann M.'},
- {'articleId': '128026416418', 'Title': 'ecf architekten gmbh'},
- {'articleId': '128026412934', 'Title': 'Invention Industriedesign und Entwicklungen'},
- {'articleId': '128030495940', 'Title': 'Heuer, Faust Dipl.-Ing.'},
- {'articleId': '128026417904', 'Title': 'Kadawittfeldarchitektur'},
- {'articleId': '128048126846', 'Title': 'Klösgen H.'},
- {'articleId': '128026414591', 'Title': 'Karsten u. Partner'},
- {'articleId': '128044241455', 'Title': 'SW-Häuser GmbH Architekt'},
- {'articleId': '128036891386', 'Title': 'Rudolph W. Dipl.-Ing.'},
- {'articleId': '128030377819', 'Title': 'Bernardi K.'},
- {'articleId': '128026421328', 'Title': 'Kessler G. Dipl.-Ing.'},
- {'articleId': '128026422117', 'Title': 'Borgmann Architekten und Ingenieure GmbH'},
- {'articleId': '128050235257', 'Title': 'M Bauträger und Projektentwicklung UG (haftungsbeschränkt)'},
- {'articleId': '128026992787', 'Title': 'OX2architekten, Orawiec I.-M., M. Prof.'},
- {'articleId': '128026415299', 'Title': 'Frey A. Dipl.-Ing.'},
- {'articleId': '128050790865', 'Title': 'Hermsen Robert Dipl.-Ing. Architekt'},
- {'articleId': '128026415216', 'Title': 'Morauke A.'},
- {'articleId': '128050804669', 'Title': 'Schommers H. Dipl.-Ing.'},
- {'articleId': '128055904022', 'Title': 'Schommers Hans-Joachim Dipl.-Ing. Architekt Innenarchitekt'},
- {'articleId': '128026421171', 'Title': 'Üstündag H. Dipl.-Ing.'},
- {'articleId': '128026414775', 'Title': 'Hirsch Eberhard Dipl.-Ing. Architekt'},
- {'articleId': '128037170468', 'Title': 'Greuel D.'},
- {'articleId': '128026412827', 'Title': 'Fuss U.'},
- {'articleId': '128026422187', 'Title': 'Hansen P.'},
- {'articleId': '128055287150', 'Title': 'Hansen Peter Architekt'},
- {'articleId': '128026422039', 'Title': 'Völlings-Grube D. Dipl.-Ing.'},
- {'articleId': '9190128864101650', 'Title': 'Wolfgang Kleicker Architekturbüro'}
- ]
- }
Advertisement
Add Comment
Please, Sign In to add comment