Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import division
- import time
- import csv
- import lxml.html
- URL = 'http://languagelog.ldc.upenn.edu/nll/?p=8199'
- start_time = None
- times = []
- tree = lxml.html.parse(URL)
- for p in tree.iter('p'):
- if p.get('class') == 'postmeta':
- time_str = p.text.split('\n')[1].strip()
- time_struct = time.strptime(time_str, '%B %d, %Y @ %I:%M %p')
- start_time = time_struct
- if p.get('class') == 'commentmeta':
- date_time = p.text.strip()
- date_time += ' ' + p[0].text
- time_struct = time.strptime(date_time, '%B %d, %Y @ %I:%M %p')
- times.append(time_struct)
- first = time.mktime(start_time)
- last = time.mktime(times[-1])
- with open('test.csv', 'w', newline='') as csvfile:
- writer = csv.writer(csvfile, delimiter=',',
- quotechar='"', quoting=csv.QUOTE_MINIMAL)
- writer.writerow(['Number of Comments'])
- comments = 0
- for second in range(int(first), int(last), 60):
- if time.mktime(times[comments]) <= second:
- comments += 1
- writer.writerow([comments])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement