Advertisement
Guest User

Untitled

a guest
Dec 4th, 2013
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. from __future__ import division
  2.  
  3. import time
  4. import csv
  5.  
  6. import lxml.html
  7.  
  8. URL = 'http://languagelog.ldc.upenn.edu/nll/?p=8199'
  9.  
  10. start_time = None
  11. times = []
  12.  
  13. tree = lxml.html.parse(URL)
  14. for p in tree.iter('p'):
  15.     if p.get('class') == 'postmeta':
  16.         time_str = p.text.split('\n')[1].strip()
  17.         time_struct = time.strptime(time_str, '%B %d, %Y @ %I:%M %p')
  18.         start_time = time_struct
  19.     if p.get('class') == 'commentmeta':
  20.         date_time = p.text.strip()
  21.         date_time += ' ' + p[0].text
  22.         time_struct = time.strptime(date_time, '%B %d, %Y @ %I:%M %p')
  23.         times.append(time_struct)
  24.  
  25.  
  26. first = time.mktime(start_time)
  27. last = time.mktime(times[-1])
  28.  
  29.  
  30. with open('test.csv', 'w', newline='') as csvfile:
  31.     writer = csv.writer(csvfile, delimiter=',',
  32.                         quotechar='"', quoting=csv.QUOTE_MINIMAL)
  33.     writer.writerow(['Number of Comments'])
  34.     comments = 0
  35.     for second in range(int(first), int(last), 60):
  36.         if time.mktime(times[comments]) <= second:
  37.             comments += 1
  38.         writer.writerow([comments])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement