Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Export rule list from uMatrix extension to a file\n",
- "path_to_file = '/Users/username/Downloads/my-umatrix-rules.txt'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# reading the file with rules\n",
- "with open(path_to_file, 'r') as fh:\n",
- " list_of_rules = [\n",
- " tuple(line.rstrip('\\n').split(' '))\n",
- " for line in fh\n",
- " ]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# top 50 rules are default \n",
- "list_of_rules = list_of_rules[50:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# lets put all custom site rules in to DataFrame\n",
- "import pandas as pd\n",
- "df = pd.DataFrame(list_of_rules, columns=('orig', 'source', 'scope', 'rule'))\n",
- "df = df[df.rule=='allow']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# same source domains are whitelised over and over\n",
- "df.source.value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Collapsing all rules irregardless of origin\n",
- "set_of_uni_rules = {\n",
- " ('*', source, scope, rule) if rule!='block' else (orig, source, scope, rule)\n",
- " for orig, source, scope, rule in list_of_rules\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# lets aggregate base domains for some sites\n",
- "set_of_unique_rules = set()\n",
- "for full_rule in set_of_uni_rules:\n",
- " orig,domain,cont,rule = full_rule\n",
- " dms = domain.split('.')\n",
- " if len(dms) > 2 and dms[0] != 'www' and dms[-1] in ('com', 'net'):\n",
- " new_domain = '.'.join(dms[-2:])\n",
- " print(\"Replacing domain '{}' for '{}'\".format(domain,new_domain))\n",
- " set_of_unique_rules.add((orig, new_domain, cont, rule))\n",
- " else:\n",
- " set_of_unique_rules.add(full_rule)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Double check if rules make sense\n",
- "set_of_unique_rules"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# write to file and import into extension\n",
- "with open(\"/tmp/new-umatrix-rules.txt\", 'w+') as wfh:\n",
- " wfh.write('\\n'.join(' '.join(t) for t in set_of_unique_rules))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment