Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Creating a database for emails and Counting Email frequency: \n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import sqlite3\n",
- "import os"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'/home/zeski/Documents/Data_Science/SQL/SQL_DBS'"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "os.chdir('/home/zeski/Documents/Data_Science/SQL/SQL_DBS')\n",
- "\n",
- "os.getcwd()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['email.db', 'FIRSTDB.db', 'sql1.db.sqbpro']"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "conn= sqlite3.connect('email.db')\n",
- "c = conn.cursor()\n",
- "os.listdir()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [],
- "source": [
- "c.execute('''\n",
- " CREATE TABLE IF NOT EXISTS Counts (email TEXT, count INTEGER)\n",
- "''')\n",
- "\n",
- "fname= input('Enter file name: ')\n",
- "\n",
- "if (len(fname)<1): fname = 'mbox.txt'\n",
- "fh = open(fname)\n",
- "for line in fh:\n",
- " if not line.startswith('From: '): continue\n",
- " pieces = line.split()\n",
- " email = pieces[1]\n",
- " c.execute('''\n",
- " SELECT count FROM Counts WHERE email = ? \n",
- " ''', (email,))\n",
- " row = c.fetchone()\n",
- " if row is None:\n",
- " c.execute('''\n",
- " INSERT INTO Counts (email, count)\n",
- " VALUES(?, 1)\n",
- " ''', (email,))\n",
- " else:\n",
- " c.execute('''\n",
- " UPDATE Counts SET count = count +1 WHERE email = ?\n",
- " ''',(email,))\n",
- "conn.commit()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [],
- "source": [
- "sql = 'SELECT email, count FROM Counts ORDER BY count DESC LIMIT 10'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "zqian@umich.edu 195\n",
- "mmmay@indiana.edu 161\n",
- "cwen@iupui.edu 158\n",
- "chmaurer@iupui.edu 111\n",
- "aaronz@vt.edu 110\n",
- "ian@caret.cam.ac.uk 96\n",
- "jimeng@umich.edu 93\n",
- "rjlowe@iupui.edu 90\n",
- "dlhaines@umich.edu 84\n",
- "david.horwitz@uct.ac.za 67\n"
- ]
- }
- ],
- "source": [
- "for row in c.execute(sql):\n",
- " print(str(row[0]), row[1])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [],
- "source": [
- "c.close()\n",
- "conn.close()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment