Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import lxml.html
- import time
- import numpy
- import unicodedata
- import pylab
- import collections
- timestore = numpy.array([[0]])
- locastore = numpy.array([['dummy location']])
- for year in range(12,14):
- if year == 12:
- mstart = 2
- mlimit = 13
- if year == 13:
- mlimit = 10
- ystring = str(year)
- for month in range(mstart,mlimit,1):
- if month == 2:
- dlimit = 28
- if month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12:
- dlimit = 32
- if month == 4 or month == 6 or month == 9 or month == 7:
- dlimit = 31
- for day in range(1,dlimit):
- if month < 10:
- mstring = '0'+ str(month)
- if month >= 10:
- mstring = str(month)
- if day < 10:
- dstring = '0'+str(day)
- if day >= 10:
- dstring = str(day)
- req = requests.get('http://www.safety.ncsu.edu/newblotter.asp?NOTDTE='+mstring+'%2F'+dstring+'%2F'+ystring+'')
- root = lxml.html.fromstring(req.text)
- no_records = 'I am sorry'
- if no_records in req.text:
- print ('NO RECORDS')
- else:
- interest_words = ['bicycle']
- seen_with = 'stolen'
- # Find last table
- tables = root.cssselect('table')
- table = tables[-1]
- rows = table.cssselect('tr')
- # Exclude table header
- rows = rows[1:]
- for row in rows:
- cells = row.cssselect('td')
- date_time = cells[1].text_content().strip()
- narrative = cells[5].text_content().strip()
- location = cells[4].text_content().strip()
- for word in interest_words:
- if word in narrative and seen_with in narrative:
- #Storing date and time
- date_time = unicodedata.normalize('NFKD', date_time).encode('ascii','ignore')
- if date_time.find(':') == 1:
- times = time.strptime(date_time,"%I:%M %p")
- hour, minute = times[3:5]
- minute = minute/60.
- hour += minute
- storearray = numpy.array([[hour]])
- timestore = numpy.concatenate((timestore,storearray))
- #Storing location
- location = unicodedata.normalize('NFKD', location).encode('ascii','ignore')
- locaarray = numpy.array([[location]])
- locastore = numpy.concatenate((locastore,locaarray))
- pylab.figure(1)
- pylab.hist(timestore,bins=24, range=(0,24), normed=True, histtype='stepfilled')
- pylab.xlim(0,24)
- pylab.xticks(numpy.arange(0,24,4))
- pylab.title('Incidinces of Bike Theft from 02/01/12 to 09/31/13')
- pylab.figure(2)
- counter = collections.Counter(locastore.ravel())
- ks = numpy.array([[]])
- vs = numpy.array([[]])
- for k, v in sorted(counter.iteritems(), key=lambda x:x[::-1]):
- ks = numpy.concatenate([[k]])
- vs = numpy.concatenate([[v]])
- pylab.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement