Advertisement
Guest User

Untitled

a guest
Feb 20th, 2016
1,280
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 25.63 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. "Populating the interactive namespace from numpy and matplotlib\n"
  15. ]
  16. }
  17. ],
  18. "source": [
  19. "import matplotlib.pyplot as plt\n",
  20. "import pandas as pd\n",
  21. "import numpy as np\n",
  22. "import plotly.plotly as py\n",
  23. "\n",
  24. "%pylab inline"
  25. ]
  26. },
  27. {
  28. "cell_type": "code",
  29. "execution_count": 11,
  30. "metadata": {
  31. "collapsed": true
  32. },
  33. "outputs": [],
  34. "source": [
  35. "from pyspark.sql import *\n",
  36. "from pyspark.sql.functions import *\n",
  37. "import itertools\n",
  38. "import datetime"
  39. ]
  40. },
  41. {
  42. "cell_type": "code",
  43. "execution_count": 3,
  44. "metadata": {
  45. "collapsed": true
  46. },
  47. "outputs": [],
  48. "source": [
  49. "frame = sqlContext.read.load(\"s3://telemetry-parquet/longitudinal/v20160212\")"
  50. ]
  51. },
  52. {
  53. "cell_type": "code",
  54. "execution_count": 8,
  55. "metadata": {
  56. "collapsed": false
  57. },
  58. "outputs": [],
  59. "source": [
  60. "subselect = frame.selectExpr(\"info.subsessionStartDate\", \"activeAddons\")"
  61. ]
  62. },
  63. {
  64. "cell_type": "code",
  65. "execution_count": 21,
  66. "metadata": {
  67. "collapsed": false
  68. },
  69. "outputs": [
  70. {
  71. "data": {
  72. "text/plain": [
  73. "MapType(StringType,StructType(List(StructField(blocklisted,BooleanType,true),StructField(description,StringType,true),StructField(name,StringType,true),StructField(userDisabled,BooleanType,true),StructField(appDisabled,BooleanType,true),StructField(version,StringType,true),StructField(scope,IntegerType,true),StructField(type,StringType,true),StructField(foreignInstall,BooleanType,true),StructField(hasBinaryComponents,BooleanType,true),StructField(installDay,LongType,true),StructField(updateDay,LongType,true),StructField(signedState,IntegerType,true))),false)"
  74. ]
  75. },
  76. "execution_count": 21,
  77. "metadata": {},
  78. "output_type": "execute_result"
  79. }
  80. ],
  81. "source": [
  82. "activeAddonsType = [f.dataType for f in subselect.schema.fields if f.name == 'activeAddons'][0].elementType\n",
  83. "activeAddonsType"
  84. ]
  85. },
  86. {
  87. "cell_type": "code",
  88. "execution_count": 45,
  89. "metadata": {
  90. "collapsed": true
  91. },
  92. "outputs": [],
  93. "source": [
  94. "def find_first_recent_activeAddons(subsessionStartDate, activeAddons):\n",
  95. " if activeAddons is None:\n",
  96. " return None\n",
  97. " for subsessionStartDate, activeAddons in itertools.izip(subsessionStartDate, activeAddons):\n",
  98. " try:\n",
  99. " if datetime.datetime.strptime(subsessionStartDate[:10], \"%Y-%m-%d\").date() >= datetime.date(2016, 1, 18) and activeAddons is not None:\n",
  100. " return activeAddons\n",
  101. " except ValueError:\n",
  102. " continue\n",
  103. " return None\n",
  104. "find_first_recent_activeAddons_udf = udf(find_first_recent_activeAddons, activeAddonsType)"
  105. ]
  106. },
  107. {
  108. "cell_type": "code",
  109. "execution_count": 48,
  110. "metadata": {
  111. "collapsed": false
  112. },
  113. "outputs": [],
  114. "source": [
  115. "lastActiveAddons = subselect.select(find_first_recent_activeAddons_udf(subselect.subsessionStartDate, subselect.activeAddons).alias(\"lastActiveAddons\")).where(\"lastActiveAddons IS NOT NULL\")"
  116. ]
  117. },
  118. {
  119. "cell_type": "code",
  120. "execution_count": 84,
  121. "metadata": {
  122. "collapsed": true
  123. },
  124. "outputs": [],
  125. "source": [
  126. "from collections import Counter\n",
  127. "\n",
  128. "class KeyedAccumulator(pyspark.AccumulatorParam):\n",
  129. " def addInPlace(self, v1, v2):\n",
  130. " assert isinstance(v1, Counter)\n",
  131. " if isinstance(v2, Counter):\n",
  132. " v1 += v2\n",
  133. " return v1\n",
  134. " k, v = v2\n",
  135. " v1[k] += v\n",
  136. " return v1\n",
  137. " \n",
  138. " def zero(self, v):\n",
  139. " assert isinstance(v, Counter), \"Expected Counter, got %r\" % (v,)\n",
  140. " return Counter()"
  141. ]
  142. },
  143. {
  144. "cell_type": "code",
  145. "execution_count": null,
  146. "metadata": {
  147. "collapsed": false
  148. },
  149. "outputs": [],
  150. "source": [
  151. "common_addons_by_id = sc.accumulator(Counter(), accum_param=KeyedAccumulator())\n",
  152. "common_unsigned_by_id = sc.accumulator(Counter(), accum_param=KeyedAccumulator())\n",
  153. "addon_map = sc.accumulator(Counter(), accum_param=KeyedAccumulator())\n",
  154. "total_nulladdons = sc.accumulator(0)\n",
  155. "total_inactive = sc.accumulator(0)\n",
  156. "total_active_nodata = sc.accumulator(0)\n",
  157. "total_active_data = sc.accumulator(0)\n",
  158. "total_active_no_addons = sc.accumulator(0)\n",
  159. "\n",
  160. "def map_activeAddons(activeAddons):\n",
  161. " total_active_data.add(1)\n",
  162. " if len(activeAddons) == 0:\n",
  163. " total_active_no_addons.add(1)\n",
  164. " return\n",
  165. " \n",
  166. " for addonId, addon in activeAddons.iteritems():\n",
  167. " addon_map.add(((addonId, addon.name, addon.version, addon.signedState), 1))\n",
  168. " common_addons_by_id.add((addonId, 1))\n",
  169. " if addon.signedState < 1:\n",
  170. " common_unsigned_by_id.add((addonId, 1))\n",
  171. "\n",
  172. "def my_accumulator(row):\n",
  173. " if row.activeAddons is None:\n",
  174. " total_nulladdons.add(1)\n",
  175. " return\n",
  176. " \n",
  177. " isActive = False\n",
  178. " for subsessionStartDate, activeAddons in itertools.izip(row.subsessionStartDate, row.activeAddons):\n",
  179. " try:\n",
  180. " if datetime.datetime.strptime(subsessionStartDate[:10], \"%Y-%m-%d\").date() < datetime.date(2016, 1, 11):\n",
  181. " break\n",
  182. " isActive = True\n",
  183. " except ValueError:\n",
  184. " continue\n",
  185. " \n",
  186. " if activeAddons is not None:\n",
  187. " map_activeAddons(activeAddons)\n",
  188. " return\n",
  189. " \n",
  190. " if isActive:\n",
  191. " total_active_nodata.add(1)\n",
  192. " else:\n",
  193. " total_inactive.add(1) \n",
  194. "\n",
  195. "subselect.rdd.foreach(my_accumulator)"
  196. ]
  197. },
  198. {
  199. "cell_type": "code",
  200. "execution_count": 139,
  201. "metadata": {
  202. "collapsed": false
  203. },
  204. "outputs": [],
  205. "source": [
  206. "import __builtin__\n",
  207. "addon_table = [(addonId, name, version, signedState, count) for ((addonId, name, version, signedState), count) in addon_map.value.items()]\n",
  208. "def name_for_id(addonId):\n",
  209. " names = Counter()\n",
  210. " for i, n, v, s, c in addon_table:\n",
  211. " if i != addonId:\n",
  212. " continue\n",
  213. " names[n] += c\n",
  214. " l = names.items()\n",
  215. " l.sort(key=lambda i: i[1], reverse=True)\n",
  216. " total = float(__builtin__.sum([i[1] for i in l]))\n",
  217. " name = l[0][0]\n",
  218. " if len(l) > 1 and l[1][1] / total > 0.2:\n",
  219. " name += \" and \" + l[1][0]\n",
  220. " if l[0][1] / total < 0.2:\n",
  221. " name += \"and {} others\".format(len(l) - 1)\n",
  222. " return name"
  223. ]
  224. },
  225. {
  226. "cell_type": "code",
  227. "execution_count": 140,
  228. "metadata": {
  229. "collapsed": false
  230. },
  231. "outputs": [
  232. {
  233. "name": "stdout",
  234. "output_type": "stream",
  235. "text": [
  236. "Users with no addons: 48.8%\n",
  237. "\n",
  238. "Top 100 enabled addons:\n",
  239. "12.6%: {d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d} (Adblock Plus)\n",
  240. "7.2%: {82AF8DCA-6DE9-405D-BD5E-43525BDAD38A} (Skype)\n",
  241. "6.9%: firefox-hotfix@mozilla.org (Firefox Hotfix: Fix the migration to clear passwords on shutdown and Firefox Application Reputation lookups)\n",
  242. "2.9%: wrc@avast.com (Avast Online Security)\n",
  243. "2.7%: {b9db16a4-6edc-47ec-a1f4-b86292ed211d} (Video DownloadHelper)\n",
  244. "2.3%: {4ED1F68A-5463-4931-9384-8FFF5ED91D92} (McAfee WebAdvisor)\n",
  245. "2.3%: cpmanager@mozillaonline.com (附加组件管理器)\n",
  246. "2.2%: cehomepage@mozillaonline.com (火狐主页)\n",
  247. "2.2%: tabtweak@mozillaonline.com (标签页优化)\n",
  248. "2.2%: easyscreenshot@mozillaonline.com (网页截图)\n",
  249. "2.1%: commonfix@mozillaonline.com (火狐修复工具)\n",
  250. "2.1%: coba@mozilla.com.cn (网银支付助手)\n",
  251. "2.0%: wx-assistant@mozillaonline.com (微信网页版助手)\n",
  252. "1.8%: vb@yandex.ru (Визуальные закладки)\n",
  253. "1.8%: yasearch@yandex.ru (Элементы Яндекса)\n",
  254. "1.3%: abs@avira.com (Avira Browserschutz and Avira Browser Safety)\n",
  255. "1.2%: firebug@software.joehewitt.com (Firebug)\n",
  256. "1.2%: mozilla_cc2@internetdownloadmanager.com (IDM integration)\n",
  257. "1.1%: sovetnik@metabar.ru (Советник Яндекс.Маркета)\n",
  258. "0.9%: light_plugin_D772DC8D6FAF43A29B25C4EBAA5AD1DE@kaspersky.com (Kaspersky Protection)\n",
  259. "0.9%: sp@avast.com (Avast SafePrice)\n",
  260. "0.8%: {a38384b3-2d1d-4f36-bc22-0f7ae402bcd7} (Визуальные закладки @Mail.Ru)\n",
  261. "0.8%: firefox@mega.co.nz (MEGA)\n",
  262. "0.7%: content_blocker_663BE84DBCC949E88C7600F63CA7F098@kaspersky.com (Dangerous Websites Blocker and Modul zum Sperren von gefährlichen Webseiten)\n",
  263. "0.7%: virtual_keyboard_07402848C2F6470194F131B0F3DE025E@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  264. "0.7%: {DDC359D1-844A-42a7-9AA1-88A850A938A8} (DownThemAll!)\n",
  265. "0.7%: adblockpopups@jessehakanen.net (Adblock Plus Pop-up Addon)\n",
  266. "0.6%: homepage@mail.ru (Домашняя страница Mail.Ru)\n",
  267. "0.6%: search@mail.ru (Поиск@Mail.Ru)\n",
  268. "0.6%: {635abd67-4fe9-1b23-4f01-e679fa7484c1} (Yahoo! Toolbar)\n",
  269. "0.6%: firefox@ghostery.com (Ghostery)\n",
  270. "0.6%: {b9bfaf1c-a63f-47cd-8b9a-29526ced9060} (Download YouTube Videos as MP4)\n",
  271. "0.6%: avg@toolbar (AVG Web TuneUp)\n",
  272. "0.6%: helper-sig@savefrom.net (SaveFrom.net - helper and SaveFrom.net помощник)\n",
  273. "0.6%: {73a6fe31-595d-460b-a920-fcc0f8843232} (NoScript)\n",
  274. "0.6%: artur.dubovoy@gmail.com (Flash Video Downloader - YouTube HD Download [4K])\n",
  275. "0.6%: {e4a8a97b-f2ed-450b-b12d-ee082ba24781} (Greasemonkey)\n",
  276. "0.6%: adbhelper@mozilla.org (ADB Helper)\n",
  277. "0.6%: {a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7} (WOT)\n",
  278. "0.6%: {C1A2A613-35F1-4FCF-B27F-2840527B6556} (Norton Identity Safe)\n",
  279. "0.5%: online_banking_08806E753BE44495B44E90AA2513BDC5@kaspersky.com (Safe Money and Sicherer Zahlungsverkehr)\n",
  280. "0.5%: content_blocker@kaspersky.com (Dangerous Websites Blocker)\n",
  281. "0.5%: fxdevtools-adapters@mozilla.org (Valence)\n",
  282. "0.4%: feca4b87-3be4-43da-a1b1-137c24220968@jetpack (YouTube Video and Audio Downloader)\n",
  283. "0.4%: support@lastpass.com (LastPass)\n",
  284. "0.4%: {19503e42-ca3c-4c27-b1e2-9cdb2170ee34} (FlashGot)\n",
  285. "0.4%: YoutubeDownloader@PeterOlayev.com (1-Click YouTube Video Downloader)\n",
  286. "0.4%: translator@zoli.bod (Google Translator for Firefox)\n",
  287. "0.4%: _65Members_@download.fromdoctopdf.com (FromDocToPDF)\n",
  288. "0.4%: {dc572301-7619-498c-a57d-39143191b318} (Tab Mix Plus)\n",
  289. "0.4%: elemhidehelper@adblockplus.org (Element Hiding Helper для Adblock Plus)\n",
  290. "0.4%: LVD-SAE@iacsearchandmedia.com (iLivid)\n",
  291. "0.4%: testpilot@labs.mozilla.com (Test Pilot)\n",
  292. "0.4%: {bee6eb20-01e0-ebd1-da83-080329fb9a3a} (Flash and Video Download)\n",
  293. "0.4%: anti_banner@kaspersky.com (Anti-Banner)\n",
  294. "0.3%: virtual_keyboard@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  295. "0.3%: online_banking@kaspersky.com (Safe Money and Sicherer Zahlungsverkehr)\n",
  296. "0.3%: jid1-YcMV6ngYmQRA2w@jetpack (Pin It button)\n",
  297. "0.3%: info@youtube-mp3.org (YouTube mp3)\n",
  298. "0.3%: url_advisor@kaspersky.com (Kaspersky URL Advisor and Modul zur Link-Untersuchung)\n",
  299. "0.3%: deskCutv2@gmail.com (FirefixTab)\n",
  300. "0.3%: client@anonymox.net (anonymoX)\n",
  301. "0.3%: jid1-F9UJ2thwoAm5gQ@jetpack (Lightbeam)\n",
  302. "0.3%: {a7c6cf7f-112c-4500-a7ea-39801a327e5f} (FireFTP)\n",
  303. "0.3%: {195A3098-0BD5-4e90-AE22-BA1C540AFD1E} (Garmin Communicator)\n",
  304. "0.3%: firefoxdav@icloud.com (iCloud Bookmarks)\n",
  305. "0.3%: {1018e4d6-728f-4b20-ad56-37578a4de76b} (Flagfox)\n",
  306. "0.3%: bingsearch.full@microsoft.com (Bing Search)\n",
  307. "0.3%: abb@amazon.com (Amazon Assistant for Firefox)\n",
  308. "0.3%: uBlock0@raymondhill.net (uBlock Origin)\n",
  309. "0.3%: web2pdfextension@web2pdf.adobedotcom (Adobe Acrobat - Create PDF and Adobe Acrobat DC - Create PDF)\n",
  310. "0.3%: yahooprotected@gmail.com (YahooToolsProtected)\n",
  311. "0.3%: mailcheck@web.de (WEB.DE MailCheck)\n",
  312. "0.3%: {fe272bd1-5f76-4ea4-8501-a05d35d823fc} (Adblock Edge)\n",
  313. "0.3%: quicklaunch@mozillaonline.com (火狐捷径)\n",
  314. "0.3%: ols@f-secure.com (Browsing Protection by F-Secure)\n",
  315. "0.3%: cliqz@cliqz.com (Cliqz)\n",
  316. "0.3%: {c45c406e-ab73-11d8-be73-000a95be3b12} (Web Developer)\n",
  317. "0.3%: _dzMembers_@www.pconverter.com (PConverter)\n",
  318. "0.2%: anttoolbar@ant.com (Ant Video Downloader)\n",
  319. "0.2%: _4zMembers_@www.videodownloadconverter.com (VideoDownloadConverter)\n",
  320. "0.2%: WebProtection@360safe.com (360 Internet Protection and Защита от веб-угроз 360)\n",
  321. "0.2%: {E0B8C461-F8FB-49b4-8373-FE32E9252800} (Evernote Web Clipper)\n",
  322. "0.2%: jid1-HAV2inXAnQPIeA@jetpack (YouTube™ Flash® Player)\n",
  323. "0.2%: _39Members_@www.mapsgalaxy.com (MapsGalaxy)\n",
  324. "0.2%: {d40f5e7b-d2cf-4856-b441-cc613eeffbe3} (BetterPrivacy)\n",
  325. "0.2%: virtual_keyboard_294FF26A1D5B455495946778FDE7CEDB@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  326. "0.2%: content_blocker_6418E0D362104DADA084DC312DFA8ABC@kaspersky.com (Dangerous Websites Blocker and Modul zum Sperren von gefährlichen Webseiten)\n",
  327. "0.2%: {37964A3C-4EE8-47b1-8321-34DE2C39BA4D} (Спутник @Mail.Ru)\n",
  328. "0.2%: safesearch@avira.com (Avira SafeSearch)\n",
  329. "0.2%: default_newtabff@gmail.com (Default NewTab)\n",
  330. "0.2%: {5384767E-00D9-40E9-B72F-9CC39D655D6F} (EPUBReader)\n",
  331. "0.2%: {1BC9BA34-1EED-42ca-A505-6D2F1A935BBB} (IE Tab 2 (FF 3.6+))\n",
  332. "0.2%: {b9acf540-acba-11e1-8ccb-001fd0e08bd4} (Easy Youtube Video Downloader Express)\n",
  333. "0.2%: personas@christopher.beard (Personas Plus)\n",
  334. "0.2%: firefox@zenmate.com (ZenMate Security, Privacy & Unblock VPN)\n",
  335. "0.2%: s3google@translator (S3.Google Translator and S3.Google Переводчик)\n",
  336. "0.2%: {3d7eb24f-2740-49df-8937-200b1cc08f8a} (Flashblock)\n",
  337. "0.2%: {20a82645-c095-46ed-80e3-08825760534b} (Microsoft .NET Framework Assistant)\n",
  338. "0.2%: jid1-4P0kohSJxU1qGg@jetpack (Hola Better Internet)\n",
  339. "\n",
  340. "Top 100 unsigned addons:\n",
  341. "0.9%: light_plugin_D772DC8D6FAF43A29B25C4EBAA5AD1DE@kaspersky.com (Kaspersky Protection)\n",
  342. "0.7%: content_blocker_663BE84DBCC949E88C7600F63CA7F098@kaspersky.com (Dangerous Websites Blocker and Modul zum Sperren von gefährlichen Webseiten)\n",
  343. "0.7%: virtual_keyboard_07402848C2F6470194F131B0F3DE025E@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  344. "0.5%: online_banking_08806E753BE44495B44E90AA2513BDC5@kaspersky.com (Safe Money and Sicherer Zahlungsverkehr)\n",
  345. "0.5%: content_blocker@kaspersky.com (Dangerous Websites Blocker)\n",
  346. "0.4%: anti_banner@kaspersky.com (Anti-Banner)\n",
  347. "0.3%: virtual_keyboard@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  348. "0.3%: online_banking@kaspersky.com (Safe Money and Sicherer Zahlungsverkehr)\n",
  349. "0.3%: url_advisor@kaspersky.com (Kaspersky URL Advisor and Modul zur Link-Untersuchung)\n",
  350. "0.3%: deskCutv2@gmail.com (FirefixTab)\n",
  351. "0.3%: yahooprotected@gmail.com (YahooToolsProtected)\n",
  352. "0.2%: virtual_keyboard_294FF26A1D5B455495946778FDE7CEDB@kaspersky.com (Virtual Keyboard and Virtuelle Tastatur)\n",
  353. "0.2%: content_blocker_6418E0D362104DADA084DC312DFA8ABC@kaspersky.com (Dangerous Websites Blocker and Modul zum Sperren von gefährlichen Webseiten)\n",
  354. "0.2%: default_newtabff@gmail.com (Default NewTab)\n",
  355. "0.2%: online_banking_69A4E213815F42BD863D889007201D82@kaspersky.com (Safe Money and Sicherer Zahlungsverkehr)\n",
  356. "0.1%: {1B33E42F-EF14-4cd3-B6DC-174571C4349C} (Thunder Extension)\n",
  357. "0.1%: mg.mail.yahoo.com@services.mozilla.org (Yahoo Mail)\n",
  358. "0.1%: {635abd67-4fe9-1b23-4f01-e679fa7484c1} (Yahoo! Toolbar)\n",
  359. "0.1%: {20a82645-c095-46ed-80e3-08825760534b} (Microsoft .NET Framework Assistant)\n",
  360. "0.1%: wrc@avast.com (Avast Online Security)\n",
  361. "0.1%: vb@yandex.ru (Визуальные закладки)\n",
  362. "0.1%: {87F8774F-B485-47E2-A755-A40A8A5E886D} (GBBD Caixa Economica Federal)\n",
  363. "0.1%: {82AF8DCA-6DE9-405D-BD5E-43525BDAD38A} (Skype)\n",
  364. "0.1%: web2pdfextension@web2pdf.adobedotcom (Adobe Acrobat - Create PDF and Adobe Acrobat DC - Create PDF)\n",
  365. "0.1%: defsearchp@gmail.com (Default SearchProtected)\n",
  366. "0.1%: search@mail.ru (Поиск@Mail.Ru)\n",
  367. "0.1%: {C7AE725D-FA5C-4027-BB4C-787EF9F8248A} (RelevantKnowledge and PremierOpinion)\n",
  368. "0.1%: plus.google.com@services.mozilla.org (Google+)\n",
  369. "0.1%: bingsearch.full@microsoft.com (Bing Search)\n",
  370. "0.1%: jqs@sun.com (Java Quick Starter)\n",
  371. "0.1%: {d720d64d-c71a-4316-b59e-8a41b860178f} (Quick Searcher)\n",
  372. "0.1%: yasearch@yandex.ru (Элементы Яндекса)\n",
  373. "0.1%: arthurj8283@gmail.com (xRocket Toolbar)\n",
  374. "0.1%: zoteroOpenOfficeIntegration@zotero.org (Zotero LibreOffice Integration)\n",
  375. "0.1%: {87F8774F-B485-47E2-A755-A40A8A5E886C} (GBBD Banco do Brasil)\n",
  376. "0.0%: loop@mozilla.org (Firefox Hello Beta)\n",
  377. "0.0%: {d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d} (Adblock Plus)\n",
  378. "0.0%: {538F0036-F358-4f84-A764-89FB437166B4} (KillFF)\n",
  379. "0.0%: {a00bef25-f21a-4539-adbb-b179b29e2b92} (Video AdBlock for Firefox)\n",
  380. "0.0%: cehomepage@mozillaonline.com (火狐主页)\n",
  381. "0.0%: readystate@openqa.org (DocumentReadyState)\n",
  382. "0.0%: {503A0CD4-EDC8-489b-853B-19E0BAA8F0A4} (Selenium RC Runner)\n",
  383. "0.0%: veggy@veggyAddon.com (Mozilla Firefox Hotfixer)\n",
  384. "0.0%: {B64D9B05-48E1-4CEB-BF58-E0643994E900} (DVDVideoSoft YouTube MP3 and Video Download)\n",
  385. "0.0%: zoteroWinWordIntegration@zotero.org (Zotero Word for Windows Integration)\n",
  386. "0.0%: abs@avira.com (Avira Browserschutz and Avira Browser Safety)\n",
  387. "0.0%: {746505DC-0E21-4667-97F8-72EA6BCF5EEF} (Shopper-Pro and ShopperPro)\n",
  388. "0.0%: {6E727987-C8EA-44DA-8749-310C0FBE3C3E} (Поиск по торрентам and TSearch)\n",
  389. "0.0%: mail.google.com@services.mozilla.org (Gmail)\n",
  390. "0.0%: jid1-G80Ec8LLEbK5fQ@jetpack (New Tab by Yahoo)\n",
  391. "0.0%: {37964A3C-4EE8-47b1-8321-34DE2C39BA4D} (Спутник @Mail.Ru)\n",
  392. "0.0%: 2020Player_IKEA@2020Technologies.com (20-20 3D Viewer - IKEA)\n",
  393. "0.0%: sweetsearch@gmail.com (Search Enginer)\n",
  394. "0.0%: {B100D0FF-0001-8CE4-2790-AACE49B8AE35} (HTML5 location provider)\n",
  395. "0.0%: {D19CA586-DD6C-4a0a-96F8-14644F340D60} (McAfee ScriptScan for Firefox and IDS_SS_NAME)\n",
  396. "0.0%: {3B4DE07A-DE43-4DBC-873F-05835FF67DCE} (The Safe Surfing)\n",
  397. "0.0%: {e4f94d1e-2f53-401e-8885-681602c0ddd8} (McAfee Security Scan Plus)\n",
  398. "0.0%: AVJYFVOD75109374@HCDE39471360.com (CinemaPlus-3.3c)\n",
  399. "0.0%: {4ED1F68A-5463-4931-9384-8FFF5ED91D92} (McAfee WebAdvisor)\n",
  400. "0.0%: jid1-4P0kohSJxU1qGg@jetpack (Hola Better Internet)\n",
  401. "0.0%: {87F8774F-B485-47E2-A755-A40A8A5E8873} (Guardião - Itaú 30 horas)\n",
  402. "0.0%: cpmanager@mozillaonline.com (附加组件管理器)\n",
  403. "0.0%: {87F8774F-B485-47E2-A755-A40A8A5E8874} (GBBD Banco Santander (Brasil) S.A.)\n",
  404. "0.0%: d4db60df25f14dae9dd18@185c395f9e794c9ab86be3eb.com (CinemaPlus-3.2c)\n",
  405. "0.0%: {23fcfd51-4958-4f00-80a3-ae97e717ed8b} (DivX Plus Web Player HTML5 <video>)\n",
  406. "0.0%: toolbar@ask.com (Ask Toolbar)\n",
  407. "0.0%: twitter.com@services.mozilla.org (Twitter)\n",
  408. "0.0%: firefox@mega.co.nz (MEGA)\n",
  409. "0.0%: {F003DA68-8256-4b37-A6C4-350FA04494DF} (Logitech SetPoint)\n",
  410. "0.0%: www.ok.ru@services.mozilla.org (Odnoklassniki)\n",
  411. "0.0%: {c0c9a2c7-2e5c-4447-bc53-97718bc91e1b} (Easy YouTube Video Downloader)\n",
  412. "0.0%: mozilla_cc2@internetdownloadmanager.com (IDM integration)\n",
  413. "0.0%: {71A44B6B-42B9-4111-BD15-E67572E92A4C} (Vision WebLock)\n",
  414. "0.0%: avg@toolbar (AVG Web TuneUp)\n",
  415. "0.0%: 9tffxtbr@free.internetspeedtracker.com (Internet Speed Tracker)\n",
  416. "0.0%: www.goal.com@services.mozilla.org (GOAL)\n",
  417. "0.0%: {195A3098-0BD5-4e90-AE22-BA1C540AFD1E} (Garmin Communicator)\n",
  418. "0.0%: zoteroMacWordIntegration@zotero.org (Zotero Word for Mac Integration)\n",
  419. "0.0%: leethax@leethax.net (leethax.net extension)\n",
  420. "0.0%: LVD-SAE@iacsearchandmedia.com (iLivid)\n",
  421. "0.0%: translator@zoli.bod (Google Translator for Firefox)\n",
  422. "0.0%: www.linkedin.com@services.mozilla.org (LinkedIn)\n",
  423. "0.0%: {4D6A6C8E-1EB2-46e1-8CAA-40DAFDE3ED93} (Address Bar Search)\n",
  424. "0.0%: dmpluginff@westbyte.com (Download Master Plugin)\n",
  425. "0.0%: www.facebook.com@services.mozilla.org (Facebook)\n",
  426. "0.0%: dmremote@westbyte.com (Download Master Remote Download)\n",
  427. "0.0%: testpilot@labs.mozilla.com (Test Pilot)\n",
  428. "0.0%: {b9db16a4-6edc-47ec-a1f4-b86292ed211d} (Video DownloadHelper)\n",
  429. "0.0%: {a6fd85ed-e919-4a43-a5af-8da18bda539f} (Selenium IDE)\n",
  430. "0.0%: dmbarff@westbyte.com (Download Master Toolbar)\n",
  431. "0.0%: e-webprint@epson.com (E-Web Print)\n",
  432. "0.0%: javaformatters@seleniumhq.org (Selenium IDE: Java Formatters)\n",
  433. "0.0%: csharpformatters@seleniumhq.org (Selenium IDE: C# Formatters)\n",
  434. "0.0%: rubyformatters@seleniumhq.org (Selenium IDE: Ruby Formatters)\n",
  435. "0.0%: pythonformatters@seleniumhq.org (Selenium IDE: Python Formatters)\n",
  436. "0.0%: faststartff@gmail.com (Fast Start)\n",
  437. "0.0%: {4d31e4c9-ce86-4b0f-8ffb-56345a8b5f6c} (Everysale.Net)\n",
  438. "0.0%: extension@b5m.com (帮5淘 - 帮5买旗下购物助手)\n",
  439. "0.0%: ffxtlbr@babylon.com (Babylon)\n",
  440. "0.0%: 9tffxtbr@download.internetspeedtracker.com (Internet Speed Tracker)\n"
  441. ]
  442. }
  443. ],
  444. "source": [
  445. "print \"Users with no addons: {:.1%}\".format(float(total_active_no_addons.value) / total_active_data.value)\n",
  446. "print\n",
  447. "print \"Top 100 enabled addons:\"\n",
  448. "top_addons = common_addons_by_id.value.items()\n",
  449. "top_addons.sort(key=lambda a: a[1], reverse=True)\n",
  450. "for addonId, count in top_addons[:100]:\n",
  451. " name = name_for_id(addonId)\n",
  452. " print u\"{:.1%}: {} ({})\".format(float(count) / total_active_data.value, addonId, name, name_ratio)\n",
  453. "\n",
  454. "print\n",
  455. "print \"Top 100 unsigned addons:\"\n",
  456. "unsigned_addons = common_unsigned_by_id.value.items()\n",
  457. "unsigned_addons.sort(key=lambda a: a[1], reverse=True)\n",
  458. "for addonId, count in unsigned_addons[:100]:\n",
  459. " name = name_for_id(addonId)\n",
  460. " print u\"{:.1%}: {} ({})\".format(float(count) / total_active_data.value, addonId, name, name_ratio)\n"
  461. ]
  462. },
  463. {
  464. "cell_type": "code",
  465. "execution_count": 122,
  466. "metadata": {
  467. "collapsed": false
  468. },
  469. "outputs": [
  470. {
  471. "data": {
  472. "text/plain": [
  473. "6"
  474. ]
  475. },
  476. "execution_count": 122,
  477. "metadata": {},
  478. "output_type": "execute_result"
  479. }
  480. ],
  481. "source": [
  482. "import __builtin__\n",
  483. "__builtin__.sum([1, 2, 3])"
  484. ]
  485. },
  486. {
  487. "cell_type": "code",
  488. "execution_count": 69,
  489. "metadata": {
  490. "collapsed": true
  491. },
  492. "outputs": [],
  493. "source": [
  494. "import boto3\n",
  495. "from gzip import GzipFile\n",
  496. "from cStringIO import StringIO\n",
  497. "import csv\n",
  498. "\n",
  499. "class S3CompressedWriter(object):\n",
  500. " def __init__(self, bucket, path, mimetype='text/plain'):\n",
  501. " self.bucket = bucket\n",
  502. " self.path = path\n",
  503. " self.mimetype = mimetype\n",
  504. " self._buffer = None\n",
  505. "\n",
  506. " def __enter__(self):\n",
  507. " self._buffer = StringIO();\n",
  508. " self._writer = GzipFile(mode=\"wb\", fileobj=self._buffer)\n",
  509. " return self._writer\n",
  510. "\n",
  511. " def __exit__(self, exc_type, exc_value, traceback):\n",
  512. " if exc_value is None:\n",
  513. " self._writer.close()\n",
  514. " self._buffer.seek(0)\n",
  515. " s3 = boto3.resource('s3')\n",
  516. " s3.Object(self.bucket, self.path).put(Body=self._buffer, ContentEncoding='gzip', ContentType=self.mimetype)\n",
  517. " self._buffer = None\n",
  518. "\n",
  519. " def __del__(self):\n",
  520. " assert self._buffer is None\n"
  521. ]
  522. },
  523. {
  524. "cell_type": "code",
  525. "execution_count": 75,
  526. "metadata": {
  527. "collapsed": false
  528. },
  529. "outputs": [],
  530. "source": [
  531. "with S3CompressedWriter('telemetry-public-analysis-2', 'bsmedberg/20160212-top-addons.csv') as fd:\n",
  532. " csvw = csv.writer(fd)\n",
  533. " for row in top_addons_by_id:\n",
  534. " csvw.writerow(row)\n"
  535. ]
  536. },
  537. {
  538. "cell_type": "code",
  539. "execution_count": 74,
  540. "metadata": {
  541. "collapsed": false
  542. },
  543. "outputs": [
  544. {
  545. "data": {
  546. "text/plain": [
  547. "322865"
  548. ]
  549. },
  550. "execution_count": 74,
  551. "metadata": {},
  552. "output_type": "execute_result"
  553. }
  554. ],
  555. "source": [
  556. "top_addons_by_id[0][1]"
  557. ]
  558. }
  559. ],
  560. "metadata": {
  561. "kernelspec": {
  562. "display_name": "Python 2",
  563. "language": "python",
  564. "name": "python2"
  565. },
  566. "language_info": {
  567. "codemirror_mode": {
  568. "name": "ipython",
  569. "version": 2
  570. },
  571. "file_extension": ".py",
  572. "mimetype": "text/x-python",
  573. "name": "python",
  574. "nbconvert_exporter": "python",
  575. "pygments_lexer": "ipython2",
  576. "version": "2.7.10"
  577. }
  578. },
  579. "nbformat": 4,
  580. "nbformat_minor": 0
  581. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement