Google.com-Ergebnisse nicht in .de ermitteln

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
########################################################################
#
# google-hidden.py
# Find results hidden by google, but accessible by other search engines.
#
# Copy? Right! 2014 Elias Schwerdtfeger, http://www.tamagothi.de/
#
# This program is free software, licesend under the terms of the pirate's
# license. You can do with it whatever you want, as long as you do not
# sue me. If you want to use this program and to sue me for it, please
# buy a commercial license. You can read the full terms of the license
# (in german language) at http://www.tamagothi.de/impressum/lizenz/
#
# Share and enjoy!
#
# $Id: google-hidden.py,v 1.2 2014/05/31 15:50:55 elias Exp $
#
# (All helpful comments are intentionally removed.)
#
########################################################################

RESULTS = 200

import sys
import urllib.parse
import urllib.request
import html.parser


class LinkExtractor(html.parser.HTMLParser):
    def __init__(self, htmldoc):
        super().__init__()
        self.links = []
        self.feed(htmldoc)

    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for attr, content in attrs:
                if attr == 'href':
                    self.links.append(content)


class BaseSearchResult(object):
    def __init__(self, search_term):
        super().__init__()
        self.result_links = []
        for uri in self.perform_search(search_term):
            if self.filter_link(uri) and uri not in self.result_links:
                self.result_links.append(uri)
        self.result_links = self.postprocess_links(self.result_links)
        self.result_links.sort()

    def filter_link(self, uri):
        return true

    def perform_search(self, search_term):
        raise NotImplemented()

    def postprocess_links(self, linklist):
        return linklist

    def get_links_from_uri(self, uri):
        req = urllib.request.Request(uri)
        req.add_header('User-agent', 'Mozilla/5.0')
        httpdocument = urllib.request.urlopen(req)
        link_extractor = LinkExtractor(httpdocument.read().decode('utf-8'))
        return link_extractor.links


class GoogleCommon(BaseSearchResult):
    def common_search_part(self, domain, search_term):
        params = urllib.parse.urlencode({'q': search_term, 'num': RESULTS})
        uri = 'http://{}/search?{}'.format(domain, params)
        return self.get_links_from_uri(uri)

    def postprocess_links(self, linklist):
        newlist = []
        for link in linklist:
            parse_res = urllib.parse.urlparse(link)
            params = urllib.parse.parse_qs(parse_res.query)
            # // ist ein Hack, um nur vollständige URIs zu haben...
            if 'q' in params and '//' in params['q'][0]:
                newlist.append(params['q'][0])
        return newlist


class GoogleDe(GoogleCommon):
    def perform_search(self, search_term):
        return self.common_search_part('www.google.de', search_term)

    def filter_link(self, uri):
        return ('google.de' not in uri and
                'google.com' not in uri and
                'googleusercontent' not in uri and
                not uri.startswith('/search'))


class GoogleCom(GoogleCommon):
    def perform_search(self, search_term):
        return self.common_search_part('www.google.com', search_term)

    def filter_link(self, uri):
        return ('google.com' not in uri and
                'googleusercontent' not in uri and
                not uri.startswith('/search'))


class Yahoo(BaseSearchResult):
    pass


class Bing(BaseSearchResult):
    pass


def not_found_in_google_de(term, engines=(GoogleCom, )):
    not_found = []
    google_de = GoogleDe(term)
    for engine in engines:
        other_engine = engine(term)
        for uri in other_engine.result_links:
            if uri not in google_de.result_links and uri not in not_found:
                not_found.append(uri)
    not_found.sort()
    return not_found


def main():
    search = '+'.join([urllib.parse.quote(i.lower()) for i in sys.argv[1:]])
    for i in not_found_in_google_de(search):
        print(i)


if __name__ == '__main__':
    main()