Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import os
- import psycopg2
- import re
- import time
- import psyco
- psyco.full()
- connection = psycopg2.connect("dbname='testing' user='test' host='localhost' password='pass'");
- cursor = connection.cursor()
- file_list = os.listdir("logs")
- def find_date(date):
- cursor.execute("SELECT * FROM days WHERE date = '%s'" % time.strftime("%d-%m-%Y", date))
- return cursor.fetchone()
- def find_hostname(hostname):
- cursor.execute("SELECT * FROM hostnames WHERE hostname = '%s'" % hostname)
- return cursor.fetchone()
- def find_person(name):
- name = name.replace("\\", "\\\\")
- cursor.execute("SELECT * FROM people WHERE name = '%s'" % name)
- return cursor.fetchone()
- def log_hostname(name, person_id):
- hostname = find_hostname(name)
- if hostname == None:
- cursor.execute("INSERT INTO hostnames (hostname) VALUES ('%s')" % name)
- connection.commit()
- hostname = find_hostname(name)
- hostname_id = str(hostname[0])
- cursor.execute("SELECT * FROM hostnames_people WHERE hostname_id = '%s' AND person_id = '%s'" % (hostname_id, person_id))
- join = cursor.fetchone()
- if join == None:
- cursor.execute("INSERT INTO hostnames_people (hostname_id, person_id) VALUES ('%s', '%s')" % (hostname_id, person_id))
- connection.commit()
- return hostname[0]
- def log_date(date):
- d = find_date(date)
- if d == None:
- cursor.execute("INSERT INTO days (date) VALUES ('%s')" % time.strftime("%d-%m-%Y", date))
- connection.commit()
- def log_person(name):
- name = re.sub('<', '', name)
- name = re.sub('>', '', name)
- person = find_person(name)
- if person == None:
- cursor.execute("INSERT INTO people (name) VALUES ('%s')" % name.replace("\\", "\\\\"))
- connection.commit()
- person = find_person(name)
- return str(person[0])
- def time_to_string(time):
- return time.strftime("%d-%m-%Y %H:%M", t)
- def sanitize(string):
- return unicode(" ".join(string).replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'"), "iso-8859-1")
- channel = '#rubyonrails'
- for file in file_list:
- if os.path.isdir(file) == False:
- f = open(os.path.join("logs", file))
- print file
- date = re.sub('ror.log.','', file)
- lines = f.readlines()
- line_count = 0
- time_taken = time.time()
- log_date(time.strptime(date, "%Y%m%d"))
- for line in lines:
- line_count += 1
- if line_count % 100 == 0:
- print "100 lines done in: " + str(time.time() - time_taken)
- time_taken = time.time()
- l = re.sub(r'\n$', '', line).split(" ")
- t = time.strptime(date + " " + re.sub(r'\[|\]','',l[0]), "%Y%m%d %H:%M")
- # when a person does /me
- if l[1] == "Action:":
- person_id = log_person(l[2])
- message = l[3:len(l)]
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'part', '%s' , '%s', '%s')" % (person_id, time_to_string(time), channel, sanitize(message)))
- # when a person parts..
- elif len(l) >= 5 and (l[4] == channel or l[4] == channel + "." or l[1] == channel or l[4] == "irc:") and re.match(r'<', l[1]) == None:
- log_hostname(l[2], person_id)
- if l[4] == "irc:":
- person_id = log_person(l[1])
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'quit', '%s', '%s')" % (person_id, time_to_string(time), channel))
- if l[3] == 'left' and l[4] == channel:
- person_id = log_person(l[1])
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'part', '%s' , '%s')" % (person_id, time_to_string(time), channel))
- if l[3] == 'joined':
- person_id = log_person(l[1])
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel) VALUES ('%s', 'join', '%s' , '%s')" % (person_id, time_to_string(time), channel))
- elif " ".join(l[2:4]) == "kicked from " + channel:
- person_id = log_person(l[1])
- other_person_id = log_person(l[6])
- cursor.execute("INSERT INTO chats (person_id, other_person_id, message_type, created_at, channel, message) VALUES ('%s','%s', 'kick', '%s' , '%s', '%s')" % (person_id, other_person_id, time_to_string(time), channel, sanitize(t[7:len(t)])))
- elif l[1] == channel + ":" and " ".join(l[2:3]) == "mode change":
- person_id = log_person(l[7].split('!')[0])
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'mode', '%s' ,'%s', '%s')" % (person_id, time_to_string(time), channel, t[4:5]))
- elif " ".join(l[1:2]) == "Nick change:":
- person_id = log_person(l[3])
- other_person_id = log_person(l[5])
- cursor.execute("INSERT INTO chats (person_id, other_person_id, message_type, created_at, channel) VALUES ('%s','%s', 'nick-change','%s' , '%s')" % (person_id, other_person_id, time_to_string(time), channel))
- else:
- person_id = log_person(l[1])
- cursor.execute("INSERT INTO chats (person_id, message_type, created_at, channel, message) VALUES ('%s', 'message', '%s' , '%s', '%s')" % (person_id, time_to_string(time), channel, sanitize(l[2:len(l)])))
- cursor.execute("UPDATE people SET chats_count = chats_count + 1 WHERE id = '%s'" % person_id)
- connection.commit()
- connection.close()
Add Comment
Please, Sign In to add comment