Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import urllib
- import re
- page_body = urllib.urlopen('http://www.radioscanner.ru/base/index.php?action=list&sortBy=0&page=0').read()
- pages = max([int(element) for element in re.findall(r'index.php\?action=list&sortBy=0&page=(\d+)', page_body)])
- grabbed_data = []
- regex_id = re.compile('/base/freq(\d+)\.html')
- regex_freq = re.compile('(\d+\.\d+)(</span>)?</a>')
- regex_views = re.compile('<td class=caption1 style="padding-left:5;color:gray">(\d+)</td>')
- regex_acks = re.compile('<td valign="middle" class=caption1><div align="center"><font color="#666666">(\d+)</font></div></td>')
- regex_comments = re.compile('<td valign="middle" class=caption1><div align="center">(\d+)</div></td>')
- logfile = open('radioscanner_base.csv', 'w')
- logfile.write("'ID','Frequency','Active/Inactive','Place','Date','Modulation','Signal type','Radiocommunication service','Ownership','Callsign','Views','Acks ','Comments','Description'\n")
- print 'Pages:', pages+1
- for page_num in range(pages + 1):
- print 'Page #%i' % page_num
- page_body = urllib.urlopen('http://www.radioscanner.ru/base/index.php?action=list&sortBy=0&page=%i' % page_num).read()
- page_body = ''.join(page_body.split('<tr valign="top" class="tbCel1">')[1:])
- page_body = ''.join(page_body.split('<table class=forums><tr><td align="right"><small></small></td></tr></table>')[:-1])
- page_body = page_body.replace('\r', '').replace('\n\n', '\n')
- page_body = page_body.split('</tr>')[:-1]
- for element in page_body:
- id = regex_id.search(element).group(1)
- freq = regex_freq.search(element).group(1)
- active = ['active', 'inactive']["<span style='color:D40202;'>" in element]
- place_and_time = element.split('<td class=caption1 style="padding-left:6;padding-right:5;">')[1].split('</small></td>')[0]
- place = place_and_time.split('<br> <small style="color:#999999;">')[0]
- time = place_and_time.split('<br> <small style="color:#999999;">')[1]
- modulation_and_type = element.split('<td class=caption1 style="padding-left:5"><strong>')[1].split('</td>')[0]
- modulation = modulation_and_type.split('</strong><br>')[0]
- type= modulation_and_type.split('</strong><br>')[1]
- service = element.split('<td class=caption1 style="padding-left:6;padding-right:5;"><small>')[1].split('</small></td>')[0].replace('<br>', '\n').replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace(''', '"')
- own = element.split('<td class=caption1 style="padding-left:5">')[2].split('</td>')[0].replace('<br>', '\n').replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace(''', '"')
- callsign = element.split('td class=caption1 style="padding-left:5"><small>')[1].split('</small></td>')[0].replace('<br>', '\n').replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace(''', '"')
- try: views = regex_views.search(element).group(1)
- except: views = '0'
- try: acks = regex_acks.search(element).group(1)
- except: acks = '0'
- try: comments = regex_comments.search(element).group(1)
- except: comments = '0'
- if 'javascript:PopUp' in element:
- page_body = urllib.urlopen('http://www.radioscanner.ru/base/index.php?action=freqdescr&freq_id=%s' % id).read()
- description = page_body.split('<td class=caption5 width="30%">')[1].split('</td></tr>')[0].replace('<br>', '\n').replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace(''', '"')
- else:
- description = ''
- #grabbed_data.append({'id':id, 'freq':freq, 'active':active, 'place':place, 'time':time, 'modulation':modulation, 'type':type, 'service':service, 'own':own, 'callsign':callsign})
- logfile.write(id + ',')
- logfile.write(freq + ',')
- logfile.write("'"+active+"'" + ',')
- logfile.write("'"+place+"'" + ',')
- logfile.write("'"+time+"'" + ',')
- logfile.write("'"+modulation+"'" + ',')
- logfile.write("'"+type+"'" + ',')
- logfile.write("'"+service+"'" + ',')
- logfile.write("'"+own+"'" + ',')
- logfile.write("'"+callsign+"'" + ',')
- logfile.write("'"+views+"'" + ',')
- logfile.write("'"+acks+"'" + ',')
- logfile.write("'"+comments+"'" + ',')
- logfile.write("'"+description+"'")
- logfile.write('\n')
- logfile.flush()
- logfile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement