View difference between Paste ID: <a href="/34ryb1AS">34ryb1AS</a> and <a href="/YwxHeKDT">YwxHeKDT</a>

## for https://stackoverflow.com/q/75828625/6146136
1		## for https://stackoverflow.com/q/75828625/6146136
2
3		# !!pip install --upgrade --pre pandas==1.5.0 ## this solution requires pandas 1.5.0
4		import pandas as pd
5
6	-	dfList, pg_num, max_pg = [], 0, None
6	+
7		dfList, pg_num, max_pg = [], 0, 3
8		base_url = 'https://s3platform.jrc.ec.europa.eu/digital-innovation-hubs-tool'
9		while (pg_num:=pg_num+1) and (not isinstance(max_pg,int) or pg_num<max_pg):
10	-	# try: dfList += pd.read_html(pg_url, extract_links='all')[:1] ## [needs v1.5.0.]
10	+
11	-	try: dfList += pd.read_html(pg_url)[:1]
11	+	try: dfList += pd.read_html(pg_url, extract_links='all')[:1]
12		except Exception as e: pg_num, _ = -1, print(f'\n{e!r} from {pg_url}')
13	-	else: print('', end=f'\rScraped {len(dfList[-1])} rows from {pg_url}')
13	+	else: print('', end=f'\rScraped {len(dfList[-1])} rows from {pg_url}')
14
15	-	# pd.concat(dfList).to_csv(output_fp, index=False) ## save without page numbers
15	+	# df = pd.concat(dfList) ## if you don't want to add a column for page numbers
16	-	df=pd.concat(dfList, keys=list(range(1,len(dfList)+1)),names=['from_pg','pgi'])
16	+	df = pd.concat(
17	-	df.reset_index().drop('pgi',axis='columns').to_csv(output_fp, index=False)
17	+	dfList, keys=list(range(1,len(dfList)+1)), names=['from_pg','pgi']
18		).reset_index().drop('pgi',axis='columns')
19
20		## clean up - separate links from text
21		orig_cols = [c for c in df.columns if c != 'from_pg']
22		for ocn in orig_cols:
23		if any(vals:=[cv for cv,*_ in df[ocn]]): df[ocn[0]] = vals
24		if any(links:=[c[1] for c in df[ocn]]): df[ocn[0].split()[0]+' Links'] = links
25		if 'Email Links' in df.columns:
26		df['Email'] = df['Email Links'].str.replace('mailto:', '', 1)
27		df = df.drop('Email Links', axis='columns')
28		df = df.drop(orig_cols, axis='columns')
29
30		## save csv file
31		df.to_csv(output_fp, index=False)