Guest User

Untitled

a guest
Jan 19th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.99 KB | None | 0 0
  1. AAM7676,2012-02-02 11:55:52,32,2012-02-03 19:55:30,62,1
  2. AAM7676,2012-02-11 13:56:11,32,2012-02-12 21:00:18,52,2
  3. AAM7676,2012-02-21 16:30:55,32,2012-02-23 13:29:41,62,1
  4. AAM7676,2012-03-07 20:03:32,32,2012-03-09 13:31:35,62,1
  5. AAM7676,2012-05-28 06:08:05,32,2012-05-29 15:49:55,52,2
  6. AAM7676,2012-08-22 12:47:28,32,2012-08-24 08:03:09,52,1
  7. AAO9229,2012-01-10 07:19:29,32,2012-01-11 16:39:16,52,2
  8. AAP0678,2012-04-09 16:35:19,32,2012-04-10 19:46:55,52,2
  9. AAP0678,2012-04-30 16:44:28,32,2012-05-01 19:20:00,52,2
  10. AAP0678,2012-06-01 19:31:34,32,2012-06-03 10:34:33,52,3
  11. AAU6100,2012-01-09 17:49:13,32,2012-01-11 02:00:33,52,3
  12. AAU6100,2012-01-20 21:18:16,32,2012-01-22 14:09:00,52,3
  13. AAU6100,2012-02-20 13:35:39,32,2012-02-21 19:45:55,52,2
  14. AAU6100,2012-03-13 09:50:51,32,2012-03-14 22:35:51,52,3
  15.  
  16. plate,jan,feb,mar,abr,may,jun,jul,aug,sep.oct,nov,dec,total
  17. AAM7676,0,3,1,0,1,0,0,1,0,0,0,0,6
  18. AAO9229,1,0,0,0,0,0,0,0,0,0,0,0,1
  19. AAP0678,0,0,0,1,1,1,0,0,0,0,0,0,3
  20. AAU6100,2,1,1,0,0,0,0,0,0,0,0,0,4
  21.  
  22. import pandas, numpy as np
  23. df = pandas.io.parsers.read_csv("baseline.csv")
  24. df["month"] = df["time2"].map(lambda x: int(x.split('-')[1]))
  25. df.groupby(["license","month"]).apply(len)
  26.  
  27. license month
  28. AAM7676 2 3
  29. 3 1
  30. 5 1
  31. 8 1
  32. AAO9229 1 1
  33. AAP0678 4 1
  34. 5 1
  35. 6 1
  36. AAU6100 1 2
  37. 2 1
  38. 3 1
  39.  
  40. t = df.groupby(["license","month"]).apply(len)
  41. t.unstack(level=0).reindex(index=range(1,13), fill_value=0).T.fillna(0)
  42.  
  43. 1 2 3 4 5 6 7 8 9 10 11 12
  44. license
  45. count AAM7676 0 3 1 0 1 0 0 1 0 0 0 0
  46. AAO9229 1 0 0 0 0 0 0 0 0 0 0 0
  47. AAP0678 0 0 0 1 1 1 0 0 0 0 0 0
  48. AAU6100 2 1 1 0 0 0 0 0 0 0 0 0
  49.  
  50. import csv
  51. import collections
  52.  
  53. result = collections.OrderedDict()
  54. for cols in csv.reader(open('basefile.csv')):
  55. if len(cols) != 6:
  56. continue
  57. plate = cols[0]
  58. month = int(cols[3][5:7])
  59. result.setdefault(plate, [plate] + [0]*12)[month] += 1
  60.  
  61. print 'plate,jan,feb,mar,abr,may,jun,jul,aug,sep.oct,nov,dec,total'
  62. for row in result.values():
  63. print ','.join(map(str, row)) + ',' + str(sum(row[1:]))
  64.  
  65. gawk -F, '
  66. {
  67. plate[$1]++
  68. split($4, dt, /-0*/)
  69. count[$1,dt[2]]++
  70. }
  71. END {
  72. print "plate,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec,total"
  73. n = asorti(plate, ordered_plates)
  74. for (i=1; i<=n; i++) {
  75. p = ordered_plates[i]
  76. printf("%s,", p)
  77. for (m=1; m<=12; m++)
  78. printf("%d,", count[p,m])
  79. print plate[p]
  80. }
  81. }
  82. ' basefile.csv
  83.  
  84. plate,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec,total
  85. AAM7676,0,3,1,0,1,0,0,1,0,0,0,0,6
  86. AAO9229,1,0,0,0,0,0,0,0,0,0,0,0,1
  87. AAP0678,0,0,0,1,1,1,0,0,0,0,0,0,3
  88. AAU6100,2,1,1,0,0,0,0,0,0,0,0,0,4
  89.  
  90. #!/usr/bin/perl
  91. use strict;
  92. use warnings;
  93. use List::Util qw/ sum /;
  94.  
  95. my %data;
  96. while (<DATA>) {
  97. my ($plate, $col4) = (split /,/)[0, 3];
  98. my ($month) = $col4 =~ /-(dd)-/;
  99. $data{$plate}{$month}++;
  100. }
  101.  
  102. print join(",", qw/ plate jan feb mar apr may jun jul aug sep oct nov dec total /), "n";
  103.  
  104. for my $plate (sort keys %data) {
  105. my @per_month = map $data{$plate}{$_} || 0, '01' .. '12';
  106. print join(",", $plate, @per_month, sum @per_month), "n";
  107. }
  108.  
  109. __DATA__
  110. AAM7676,2012-02-02 11:55:52,32,2012-02-03 19:55:30,62,1
  111. AAM7676,2012-02-11 13:56:11,32,2012-02-12 21:00:18,52,2
  112. AAM7676,2012-02-21 16:30:55,32,2012-02-23 13:29:41,62,1
  113. AAM7676,2012-03-07 20:03:32,32,2012-03-09 13:31:35,62,1
  114. AAM7676,2012-05-28 06:08:05,32,2012-05-29 15:49:55,52,2
  115. AAM7676,2012-08-22 12:47:28,32,2012-08-24 08:03:09,52,1
  116. AAO9229,2012-01-10 07:19:29,32,2012-01-11 16:39:16,52,2
  117. AAP0678,2012-04-09 16:35:19,32,2012-04-10 19:46:55,52,2
  118. AAP0678,2012-04-30 16:44:28,32,2012-05-01 19:20:00,52,2
  119. AAP0678,2012-06-01 19:31:34,32,2012-06-03 10:34:33,52,3
  120. AAU6100,2012-01-09 17:49:13,32,2012-01-11 02:00:33,52,3
  121. AAU6100,2012-01-20 21:18:16,32,2012-01-22 14:09:00,52,3
  122. AAU6100,2012-02-20 13:35:39,32,2012-02-21 19:45:55,52,2
  123. AAU6100,2012-03-13 09:50:51,32,2012-03-14 22:35:51,52,3
  124.  
  125. from collections import defaultdict
  126. d = defaultdict(lambda : [None]+[0]*12)
  127.  
  128. with open('yourfile') as f:
  129. for line in f:
  130. plate,_,_,time,_,_ = line.split(',') #maybe use csv instead
  131. month = int(time.split('-')[1]) #get the month
  132. d[plate][month] += 1
  133.  
  134. SELECT
  135. plate,
  136. COUNT(IF(MONTH(dt2) = 1, 1, NULL)) jan,
  137. COUNT(IF(MONTH(dt2) = 2, 1, NULL)) feb,
  138. COUNT(IF(MONTH(dt2) = 3, 1, NULL)) mar,
  139. COUNT(IF(MONTH(dt2) = 4, 1, NULL)) apr,
  140. COUNT(IF(MONTH(dt2) = 5, 1, NULL)) may,
  141. COUNT(*) total
  142. FROM
  143. basefile_table
  144. WHERE
  145. YEAR(dt2) = 2012
  146. GROUP BY
  147. plate;
  148.  
  149. +---------+-----+-----+-----+-----+-----+-------+
  150. | plate | jan | feb | mar | apr | may | total |
  151. +---------+-----+-----+-----+-----+-----+-------+
  152. | AAM7676 | 0 | 3 | 1 | 0 | 1 | 6 |
  153. | AAO9229 | 1 | 0 | 0 | 0 | 0 | 1 |
  154. | AAP0678 | 0 | 0 | 0 | 1 | 1 | 3 |
  155. | AAU6100 | 2 | 1 | 1 | 0 | 0 | 4 |
  156. +---------+-----+-----+-----+-----+-----+-------+
  157.  
  158. cut -d, -f1,4 basefile.csv |
  159. sed 's/,2012-([0-9][0-9])-[0-9][0-9] ..:..:..$/ 1/'
  160.  
  161. AAM7676 02
  162. AAM7676 02
  163. AAM7676 02
  164. AAM7676 03
  165. AAM7676 05
  166. AAM7676 08
  167. AAO9229 01
  168. AAP0678 04
  169. AAP0678 05
  170. AAP0678 06
  171. AAU6100 01
  172. AAU6100 01
  173. AAU6100 02
  174. AAU6100 03
  175.  
  176. ... |
  177. sort | uniq -c
  178.  
  179. 3 AAM7676 02
  180. 1 AAM7676 03
  181. 1 AAM7676 05
  182. 1 AAM7676 08
  183. 1 AAO9229 01
  184. 1 AAP0678 04
  185. 1 AAP0678 05
  186. 1 AAP0678 06
  187. 2 AAU6100 01
  188. 1 AAU6100 02
  189. 1 AAU6100 03
  190.  
  191. cut -d, -f1,4 basefile.csv |
  192. sed 's/,2012-([0-9][0-9])-[0-9][0-9] ..:..:..$/ 1/' |
  193. sort |
  194. uniq -c |
  195. awk '
  196. { if ($2 != last_plate && last_plate != "")
  197. {
  198. printf "%s", last_plate
  199. for (i = 1; i <= 12; i++)
  200. {
  201. printf ",%d", count[i]
  202. count[i] = 0;
  203. }
  204. print ""
  205. }
  206. last_plate = $2
  207. count[$3+0] = $1
  208. }
  209. END { if (last_plate != "")
  210. {
  211. printf "%s", last_plate
  212. for (i = 1; i <= 12; i++)
  213. printf ",%d", count[i]
  214. print ""
  215. }
  216. }'
  217.  
  218. AAM7676,0,3,1,0,1,0,0,1,0,0,0,0
  219. AAO9229,1,0,0,0,0,0,0,0,0,0,0,0
  220. AAP0678,0,0,0,1,1,1,0,0,0,0,0,0
  221. AAU6100,2,1,1,0,0,0,0,0,0,0,0,0
  222.  
  223. drop table if exists toto;
  224. create table toto(
  225. plate VARCHAR(32),
  226. date1 DATETIME,
  227. something1 INT(10),
  228. date2 DATETIME,
  229. something2 INT(10),
  230. something3 INT(10)
  231. );
  232.  
  233. INSERT INTO toto VALUES('AAM7676','2012-02-02 11:55:52',32,'2012-02-03 19:55:30',62,1);
  234. INSERT INTO toto VALUES('AAM7676','2012-02-11 13:56:11',32,'2012-02-12 21:00:18',52,2);
  235. INSERT INTO toto VALUES('AAM7676','2012-02-21 16:30:55',32,'2012-02-23 13:29:41',62,1);
  236. INSERT INTO toto VALUES('AAM7676','2012-03-07 20:03:32',32,'2012-03-09 13:31:35',62,1);
  237. INSERT INTO toto VALUES('AAM7676','2012-05-28 06:08:05',32,'2012-05-29 15:49:55',52,2);
  238. INSERT INTO toto VALUES('AAM7676','2012-08-22 12:47:28',32,'2012-08-24 08:03:09',52,1);
  239. INSERT INTO toto VALUES('AAO9229','2012-01-10 07:19:29',32,'2012-01-11 16:39:16',52,2);
  240. INSERT INTO toto VALUES('AAP0678','2012-04-09 16:35:19',32,'2012-04-10 19:46:55',52,2);
  241. INSERT INTO toto VALUES('AAP0678','2012-04-30 16:44:28',32,'2012-05-01 19:20:00',52,2);
  242. INSERT INTO toto VALUES('AAP0678','2012-06-01 19:31:34',32,'2012-06-03 10:34:33',52,3);
  243. INSERT INTO toto VALUES('AAU6100','2012-01-09 17:49:13',32,'2012-01-11 02:00:33',52,3);
  244. INSERT INTO toto VALUES('AAU6100','2012-01-20 21:18:16',32,'2012-01-22 14:09:00',52,3);
  245. INSERT INTO toto VALUES('AAU6100','2012-02-20 13:35:39',32,'2012-02-21 19:45:55',52,2);
  246. INSERT INTO toto VALUES('AAU6100','2012-03-13 09:50:51',32,'2012-03-14 22:35:51',52,3);
  247.  
  248.  
  249. SELECT
  250. t.plate,
  251. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=1),
  252. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=2),
  253. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=3),
  254. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=4),
  255. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=5),
  256. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=6),
  257. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=7),
  258. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=8),
  259. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=9),
  260. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=10),
  261. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=11),
  262. (SELECT COUNT(*) FROM toto tt WHERE tt.plate=t.plate AND EXTRACT(MONTH FROM date1)=12),
  263. COUNT(*)
  264. FROM toto t
  265. GROUP BY plate;
  266.  
  267. AAM7676 0 3 1 0 1 0 0 1 0 0 0 0 6
  268. AAO9229 1 0 0 0 0 0 0 0 0 0 0 0 1
  269. AAP0678 0 0 0 2 0 1 0 0 0 0 0 0 3
  270. AAU6100 2 1 1 0 0 0 0 0 0 0 0 0 4
Add Comment
Please, Sign In to add comment