Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- AAM7676,2012-02-02 11:55:52,32,2012-02-03 19:55:30,62,1
- AAM7676,2012-02-11 13:56:11,32,2012-02-12 21:00:18,52,2
- AAM7676,2012-02-21 16:30:55,32,2012-02-23 13:29:41,62,1
- AAM7676,2012-03-07 20:03:32,32,2012-03-09 13:31:35,62,1
- AAM7676,2012-05-28 06:08:05,32,2012-05-29 15:49:55,52,2
- AAM7676,2012-08-22 12:47:28,32,2012-08-24 08:03:09,52,1
- AAO9229,2012-01-10 07:19:29,32,2012-01-11 16:39:16,52,2
- AAP0678,2012-04-09 16:35:19,32,2012-04-10 19:46:55,52,2
- AAP0678,2012-04-30 16:44:28,32,2012-05-01 19:20:00,52,2
- AAP0678,2012-06-01 19:31:34,32,2012-06-03 10:34:33,52,3
- AAU6100,2012-01-09 17:49:13,32,2012-01-11 02:00:33,52,3
- AAU6100,2012-01-20 21:18:16,32,2012-01-22 14:09:00,52,3
- AAU6100,2012-02-20 13:35:39,32,2012-02-21 19:45:55,52,2
- AAU6100,2012-03-13 09:50:51,32,2012-03-14 22:35:51,52,3
- plate,jan,feb,mar,abr,may,jun,jul,aug,sep.oct,nov,dec,total
- AAM7676,0,3,1,0,1,0,0,1,0,0,0,0,6
- AAO9229,1,0,0,0,0,0,0,0,0,0,0,0,1
- AAP0678,0,0,0,1,1,1,0,0,0,0,0,0,3
- AAU6100,2,1,1,0,0,0,0,0,0,0,0,0,4
- import pandas, numpy as np
- df = pandas.io.parsers.read_csv("baseline.csv")
- df["month"] = df["time2"].map(lambda x: int(x.split('-')[1]))
- df.groupby(["license","month"]).apply(len)
- license month
- AAM7676 2 3
- 3 1
- 5 1
- 8 1
- AAO9229 1 1
- AAP0678 4 1
- 5 1
- 6 1
- AAU6100 1 2
- 2 1
- 3 1
- t = df.groupby(["license","month"]).apply(len)
- t.unstack(level=0).reindex(index=range(1,13), fill_value=0).T.fillna(0)
- 1 2 3 4 5 6 7 8 9 10 11 12
- license
- count AAM7676 0 3 1 0 1 0 0 1 0 0 0 0
- AAO9229 1 0 0 0 0 0 0 0 0 0 0 0
- AAP0678 0 0 0 1 1 1 0 0 0 0 0 0
- AAU6100 2 1 1 0 0 0 0 0 0 0 0 0
- import csv
- import collections
- result = collections.OrderedDict()
- for cols in csv.reader(open('basefile.csv')):
- if len(cols) != 6:
- continue
- plate = cols[0]
- month = int(cols[3][5:7])
- result.setdefault(plate, [plate] + [0]*12)[month] += 1
- print 'plate,jan,feb,mar,abr,may,jun,jul,aug,sep.oct,nov,dec,total'
- for row in result.values():
- print ','.join(map(str, row)) + ',' + str(sum(row[1:]))
- gawk -F, '
- {
- plate[$1]++
- split($4, dt, /-0*/)
- count[$1,dt[2]]++
- }
- END {
- print "plate,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec,total"
- n = asorti(plate, ordered_plates)
- for (i=1; i<=n; i++) {
- p = ordered_plates[i]
- printf("%s,", p)
- for (m=1; m<=12; m++)
- printf("%d,", count[p,m])
- print plate[p]
- }
- }
- ' basefile.csv
- plate,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec,total
- AAM7676,0,3,1,0,1,0,0,1,0,0,0,0,6
- AAO9229,1,0,0,0,0,0,0,0,0,0,0,0,1
- AAP0678,0,0,0,1,1,1,0,0,0,0,0,0,3
- AAU6100,2,1,1,0,0,0,0,0,0,0,0,0,4
- #!/usr/bin/perl
- use strict;
- use warnings;
- use List::Util qw/ sum /;
- my %data;
- while (<DATA>) {
- my ($plate, $col4) = (split /,/)[0, 3];
- my ($month) = $col4 =~ /-(dd)-/;
- $data{$plate}{$month}++;
- }
- print join(",", qw/ plate jan feb mar apr may jun jul aug sep oct nov dec total /), "n";
- for my $plate (sort keys %data) {
- my @per_month = map $data{$plate}{$_} || 0, '01' .. '12';
- print join(",", $plate, @per_month, sum @per_month), "n";
- }
- __DATA__
- AAM7676,2012-02-02 11:55:52,32,2012-02-03 19:55:30,62,1
- AAM7676,2012-02-11 13:56:11,32,2012-02-12 21:00:18,52,2
- AAM7676,2012-02-21 16:30:55,32,2012-02-23 13:29:41,62,1
- AAM7676,2012-03-07 20:03:32,32,2012-03-09 13:31:35,62,1
- AAM7676,2012-05-28 06:08:05,32,2012-05-29 15:49:55,52,2
- AAM7676,2012-08-22 12:47:28,32,2012-08-24 08:03:09,52,1
- AAO9229,2012-01-10 07:19:29,32,2012-01-11 16:39:16,52,2
- AAP0678,2012-04-09 16:35:19,32,2012-04-10 19:46:55,52,2
- AAP0678,2012-04-30 16:44:28,32,2012-05-01 19:20:00,52,2
- AAP0678,2012-06-01 19:31:34,32,2012-06-03 10:34:33,52,3
- AAU6100,2012-01-09 17:49:13,32,2012-01-11 02:00:33,52,3
- AAU6100,2012-01-20 21:18:16,32,2012-01-22 14:09:00,52,3
- AAU6100,2012-02-20 13:35:39,32,2012-02-21 19:45:55,52,2
- AAU6100,2012-03-13 09:50:51,32,2012-03-14 22:35:51,52,3
- from collections import defaultdict
- d = defaultdict(lambda : [None]+[0]*12)
- with open('yourfile') as f:
- for line in f:
- plate,_,_,time,_,_ = line.split(',') #maybe use csv instead
- month = int(time.split('-')[1]) #get the month
- d[plate][month] += 1
- SELECT
- plate,
- COUNT(IF(MONTH(dt2) = 1, 1, NULL)) jan,
- COUNT(IF(MONTH(dt2) = 2, 1, NULL)) feb,
- COUNT(IF(MONTH(dt2) = 3, 1, NULL)) mar,
- COUNT(IF(MONTH(dt2) = 4, 1, NULL)) apr,
- COUNT(IF(MONTH(dt2) = 5, 1, NULL)) may,
- COUNT(*) total
- FROM
- basefile_table
- WHERE
- YEAR(dt2) = 2012
- GROUP BY
- plate;
- +---------+-----+-----+-----+-----+-----+-------+
- | plate | jan | feb | mar | apr | may | total |
- +---------+-----+-----+-----+-----+-----+-------+
- | AAM7676 | 0 | 3 | 1 | 0 | 1 | 6 |
- | AAO9229 | 1 | 0 | 0 | 0 | 0 | 1 |
- | AAP0678 | 0 | 0 | 0 | 1 | 1 | 3 |
- | AAU6100 | 2 | 1 | 1 | 0 | 0 | 4 |
- +---------+-----+-----+-----+-----+-----+-------+
- cut -d, -f1,4 basefile.csv |
- sed 's/,2012-([0-9][0-9])-[0-9][0-9] ..:..:..$/ 1/'
- AAM7676 02
- AAM7676 02
- AAM7676 02
- AAM7676 03
- AAM7676 05
- AAM7676 08
- AAO9229 01
- AAP0678 04
- AAP0678 05
- AAP0678 06
- AAU6100 01
- AAU6100 01
- AAU6100 02
- AAU6100 03
- ... |
- sort | uniq -c
- 3 AAM7676 02
- 1 AAM7676 03
- 1 AAM7676 05
- 1 AAM7676 08
- 1 AAO9229 01
- 1 AAP0678 04
- 1 AAP0678 05
- 1 AAP0678 06
- 2 AAU6100 01
- 1 AAU6100 02
- 1 AAU6100 03
- cut -d, -f1,4 basefile.csv |
- sed 's/,2012-([0-9][0-9])-[0-9][0-9] ..:..:..$/ 1/' |
- sort |
- uniq -c |
- awk '
- { if ($2 != last_plate && last_plate != "")
- {
- printf "%s", last_plate
- for (i = 1; i <= 12; i++)
- {
- printf ",%d", count[i]
- count[i] = 0;
- }
- print ""
- }
- last_plate = $2
- count[$3+0] = $1
- }
- END { if (last_plate != "")
- {
- printf "%s", last_plate
- for (i = 1; i <= 12; i++)
- printf ",%d", count[i]
- print ""
- }
- }'
- AAM7676,0,3,1,0,1,0,0,1,0,0,0,0
- AAO9229,1,0,0,0,0,0,0,0,0,0,0,0
- AAP0678,0,0,0,1,1,1,0,0,0,0,0,0
- AAU6100,2,1,1,0,0,0,0,0,0,0,0,0
- drop table if exists toto;
- create table toto(
- plate VARCHAR(32),
- date1 DATETIME,
- something1 INT(10),
- date2 DATETIME,
- something2 INT(10),
- something3 INT(10)
- );
- INSERT INTO toto VALUES('AAM7676','2012-02-02 11:55:52',32,'2012-02-03 19:55:30',62,1);
- INSERT INTO toto VALUES('AAM7676','2012-02-11 13:56:11',32,'2012-02-12 21:00:18',52,2);
- INSERT INTO toto VALUES('AAM7676','2012-02-21 16:30:55',32,'2012-02-23 13:29:41',62,1);
- INSERT INTO toto VALUES('AAM7676','2012-03-07 20:03:32',32,'2012-03-09 13:31:35',62,1);
- INSERT INTO toto VALUES('AAM7676','2012-05-28 06:08:05',32,'2012-05-29 15:49:55',52,2);
- INSERT INTO toto VALUES('AAM7676','2012-08-22 12:47:28',32,'2012-08-24 08:03:09',52,1);
- INSERT INTO toto VALUES('AAO9229','2012-01-10 07:19:29',32,'2012-01-11 16:39:16',52,2);
- INSERT INTO toto VALUES('AAP0678','2012-04-09 16:35:19',32,'2012-04-10 19:46:55',52,2);
- INSERT INTO toto VALUES('AAP0678','2012-04-30 16:44:28',32,'2012-05-01 19:20:00',52,2);
- INSERT INTO toto VALUES('AAP0678','2012-06-01 19:31:34',32,'2012-06-03 10:34:33',52,3);
- INSERT INTO toto VALUES('AAU6100','2012-01-09 17:49:13',32,'2012-01-11 02:00:33',52,3);
- INSERT INTO toto VALUES('AAU6100','2012-01-20 21:18:16',32,'2012-01-22 14:09:00',52,3);
- INSERT INTO toto VALUES('AAU6100','2012-02-20 13:35:39',32,'2012-02-21 19:45:55',52,2);
- INSERT INTO toto VALUES('AAU6100','2012-03-13 09:50:51',32,'2012-03-14 22:35:51',52,3);
- SELECT
- t.plate,
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=1),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=2),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=3),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=4),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=5),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=6),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=7),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=8),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=9),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=10),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=11),
- (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=12),
- COUNT(*)
- FROM toto t
- GROUP BY plate;
- AAM7676 0 3 1 0 1 0 0 1 0 0 0 0 6
- AAO9229 1 0 0 0 0 0 0 0 0 0 0 0 1
- AAP0678 0 0 0 2 0 1 0 0 0 0 0 0 3
- AAU6100 2 1 1 0 0 0 0 0 0 0 0 0 4
Add Comment
Please, Sign In to add comment