Guest User

Untitled

a guest
Sep 24th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.22 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": null,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "# Export rule list from uMatrix extension to a file\n",
  10. "path_to_file = '/Users/username/Downloads/my-umatrix-rules.txt'"
  11. ]
  12. },
  13. {
  14. "cell_type": "code",
  15. "execution_count": null,
  16. "metadata": {},
  17. "outputs": [],
  18. "source": [
  19. "# reading the file with rules\n",
  20. "with open(path_to_file, 'r') as fh:\n",
  21. " list_of_rules = [\n",
  22. " tuple(line.rstrip('\\n').split(' '))\n",
  23. " for line in fh\n",
  24. " ]"
  25. ]
  26. },
  27. {
  28. "cell_type": "code",
  29. "execution_count": null,
  30. "metadata": {},
  31. "outputs": [],
  32. "source": [
  33. "# top 50 rules are default \n",
  34. "list_of_rules = list_of_rules[50:]"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": null,
  40. "metadata": {},
  41. "outputs": [],
  42. "source": [
  43. "# lets put all custom site rules in to DataFrame\n",
  44. "import pandas as pd\n",
  45. "df = pd.DataFrame(list_of_rules, columns=('orig', 'source', 'scope', 'rule'))\n",
  46. "df = df[df.rule=='allow']"
  47. ]
  48. },
  49. {
  50. "cell_type": "code",
  51. "execution_count": null,
  52. "metadata": {},
  53. "outputs": [],
  54. "source": [
  55. "# same source domains are whitelised over and over\n",
  56. "df.source.value_counts()"
  57. ]
  58. },
  59. {
  60. "cell_type": "code",
  61. "execution_count": null,
  62. "metadata": {},
  63. "outputs": [],
  64. "source": [
  65. "# Collapsing all rules irregardless of origin\n",
  66. "set_of_uni_rules = {\n",
  67. " ('*', source, scope, rule) if rule!='block' else (orig, source, scope, rule)\n",
  68. " for orig, source, scope, rule in list_of_rules\n",
  69. "}"
  70. ]
  71. },
  72. {
  73. "cell_type": "code",
  74. "execution_count": null,
  75. "metadata": {},
  76. "outputs": [],
  77. "source": [
  78. "# lets aggregate base domains for some sites\n",
  79. "set_of_unique_rules = set()\n",
  80. "for full_rule in set_of_uni_rules:\n",
  81. " orig,domain,cont,rule = full_rule\n",
  82. " dms = domain.split('.')\n",
  83. " if len(dms) > 2 and dms[0] != 'www' and dms[-1] in ('com', 'net'):\n",
  84. " new_domain = '.'.join(dms[-2:])\n",
  85. " print(\"Replacing domain '{}' for '{}'\".format(domain,new_domain))\n",
  86. " set_of_unique_rules.add((orig, new_domain, cont, rule))\n",
  87. " else:\n",
  88. " set_of_unique_rules.add(full_rule)"
  89. ]
  90. },
  91. {
  92. "cell_type": "code",
  93. "execution_count": null,
  94. "metadata": {},
  95. "outputs": [],
  96. "source": [
  97. "# Double check if rules make sense\n",
  98. "set_of_unique_rules"
  99. ]
  100. },
  101. {
  102. "cell_type": "code",
  103. "execution_count": null,
  104. "metadata": {},
  105. "outputs": [],
  106. "source": [
  107. "# write to file and import into extension\n",
  108. "with open(\"/tmp/new-umatrix-rules.txt\", 'w+') as wfh:\n",
  109. " wfh.write('\\n'.join(' '.join(t) for t in set_of_unique_rules))"
  110. ]
  111. }
  112. ],
  113. "metadata": {
  114. "kernelspec": {
  115. "display_name": "Python 3",
  116. "language": "python",
  117. "name": "python3"
  118. },
  119. "language_info": {
  120. "codemirror_mode": {
  121. "name": "ipython",
  122. "version": 3
  123. },
  124. "file_extension": ".py",
  125. "mimetype": "text/x-python",
  126. "name": "python",
  127. "nbconvert_exporter": "python",
  128. "pygments_lexer": "ipython3",
  129. "version": "3.6.4"
  130. }
  131. },
  132. "nbformat": 4,
  133. "nbformat_minor": 2
  134. }
Add Comment
Please, Sign In to add comment