
Scapre Daily Sangram
By: a guest on
Apr 30th, 2012 | syntax:
Python | size: 1.09 KB | hits: 27 | expires: Never
import codecs
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
def scrape(folder, start, end):
print '-*-*-*-*-*-*-*-*- Scarping Starts -*-*-*-*-*-*-*-*-*-'
for I in range(start, end+1):
url = 'http://www.dailysangram.com/news_details.php?news_id=' + str(I)
print url , ' : '
wp = urlopen(url)
soup =BeautifulSoup(wp)
if soup:
doc = ''
texts = soup.findAll(True, {'class':'news-details'})
if texts:
for text in texts:
doc += text.getText() + '\n'
if doc.strip()=='':
continue
print doc
# confirm = raw_input('Write in document (y/n)? ')
# if confirm != 'n':
fp = codecs.open(folder+'/'+str(I)+'.dat', 'w', encoding='utf-8')
fp.writelines(doc)
fp.close()
print '-------------- end -----------------\n\n'
print '-*-*-*-*-*-*-*-*-*- Done All Scraping -*-*-*-*-*-*-*-*-*-*-*-'
"------------------------------------------------------"
scrape('test', 24700, 24800)