Advertisement
Guest User

backends.py

a guest
May 5th, 2015
260
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 17.88 KB | None | 0 0
  1. # coding: utf-8
  2. import datetime
  3. from django.db import connection, transaction
  4. from resources.models import ViewEntry, View
  5.  
  6.  
  7. class BaseGetTaskBackend(object):
  8.     task_sql_template = None
  9.  
  10.     @staticmethod
  11.     def backend_for_type(type, **kwargs):
  12.         if type == 'yandex':
  13.             return YandexGetTaskBackend(**kwargs)
  14.         elif type == 'google':
  15.             return GoogleGetTaskBackend(**kwargs)
  16.  
  17.         raise AttributeError("Type value must be 'yandex' or 'google' or 'wordstat' either")
  18.  
  19.     def __init__(self, request, scope='all', task_id=None):
  20.         self.scope = scope
  21.         self.task_id = task_id
  22.         self.request = request
  23.  
  24.     def get_account_subscriptions_in_scope(self):
  25.         if self.scope == 'user':
  26.             return self.account_subscriptions_for_search_engine()
  27.  
  28.         return None
  29.  
  30.     def get_task(self, account_subscription_ids=None):
  31.         get_task_sql = self.generate_get_task_sql(account_subscription_ids)
  32.  
  33.         if not get_task_sql:
  34.             return None, None
  35.  
  36.         cursor = connection.cursor()
  37.         cursor.execute(get_task_sql)
  38.  
  39.         task = cursor.fetchone() if cursor.rowcount > 0 else None
  40.         description = cursor.description
  41.  
  42.         cursor.close()
  43.         transaction.commit_unless_managed()
  44.  
  45.         return description, task
  46.  
  47.     def account_subscriptions_for_search_engine(self):
  48.         raise NotImplementedError
  49.  
  50.     def generate_get_task_sql(self, account_subscription_ids):
  51.         task_sql_template = self.get_task_sql_template()
  52.  
  53.         if not task_sql_template:
  54.             return None
  55.  
  56.         account_subscription_limiting = ''
  57.  
  58.         if not account_subscription_ids is None:
  59.             assert isinstance(account_subscription_ids, list), "'account_subscription_ids' variable must be list"
  60.  
  61.             if len(account_subscription_ids) == 0:
  62.                 return None
  63.  
  64.             account_subscription_limiting = ' and y.id in (%s)' % ','.join(map(str, account_subscription_ids))
  65.  
  66.         yesterday_task_cond = "and (collected is null or collected < now()-'24 hours'::interval)" if not self.task_id else ''
  67.         task_id_limiting = 'and id = {0}'.format(self.task_id) if self.task_id else ''
  68.         return task_sql_template % {
  69.             'account_subscription_limiting': account_subscription_limiting,
  70.             'yesterday_task': yesterday_task_cond,
  71.             'task_id_limiting': task_id_limiting,
  72.         }
  73.  
  74.     def get_task_sql_template(self):
  75.         return self.task_sql_template
  76.  
  77.  
  78. class YandexGetTaskBackend(BaseGetTaskBackend):
  79.     task_sql_template = """
  80.                lock table yandex_subscriptions in access exclusive mode;
  81.  
  82.                create temporary table subscriptions_active
  83.                on commit drop
  84.                as
  85.                    select
  86.                        y.id id,
  87.                        q.querystring querystring,
  88.                        r.code region,
  89.                        max(a.search_depth) search_depth,
  90.                        y.queued,
  91.                        y.collected
  92.                    from yandex_accounts_subscriptions a
  93.                    join yandex_subscriptions y
  94.                        on a.yandex_subscription_id=y.id
  95.                    join yandex_regions r
  96.                        on y.yandex_region_id=r.id
  97.                    join queries q
  98.                        on y.query_id=q.id
  99.                    join resources_subscriptioninterval si
  100.                        on si.resource_subscription_id = a.id*8
  101.                    where (si.datetime_unsubscribed is null
  102.                        or si.datetime_unsubscribed>now())
  103.                        %(account_subscription_limiting)s
  104.                    group by y.id, q.querystring, r.code;
  105.  
  106.                create temporary table subscription_next_task
  107.                on commit drop
  108.                as
  109.                    select
  110.                        id,
  111.                        querystring,
  112.                        region,
  113.                        search_depth
  114.                    from subscriptions_active
  115.                    where search_depth > 0
  116.                        %(yesterday_task)s
  117.                        %(task_id_limiting)s
  118.                        and (queued is null or queued < now()-'10 minutes'::interval)
  119.                    order by collected, id
  120.                    limit 1;
  121.  
  122.                update yandex_subscriptions
  123.                set queued=now()
  124.                where id in (select id from subscription_next_task);
  125.  
  126.                select * from subscription_next_task;
  127.            """
  128.  
  129.  
  130.     def account_subscriptions_for_search_engine(self):
  131.         resource_subscriptions = list(ViewEntry.objects.filter(view__in=(View.objects.filter(
  132.             perms__permission__in=self.request.user.user_permissions.all()).distinct())).
  133.                                       values_list('subscription', flat=True))
  134.  
  135.         sql = """
  136.        select distinct rs._id
  137.        from resource_subscription_view rs
  138.            join resources_subscriptioninterval si on si.resource_subscription_id = rs.id
  139.        where rs.id in ({0})
  140.          and (si.datetime_unsubscribed is NULL or si.datetime_unsubscribed>now())
  141.          and (rs.search_engine = 'Y')
  142.        order by rs._id
  143.        """.format(','.join(map(str, resource_subscriptions)))
  144.  
  145.         cursor = connection.cursor()
  146.         cursor.execute(sql)
  147.  
  148.         rows = cursor.fetchall()
  149.         return [row[0] for row in rows]
  150.  
  151.  
  152. class GoogleGetTaskBackend(BaseGetTaskBackend):
  153.     task_sql_template = """
  154.                lock table google_subscriptions in access exclusive mode;
  155.  
  156.                create temporary table subscriptions_active
  157.                on commit drop
  158.                as
  159.                    select
  160.                        y.id id,
  161.                        q.querystring querystring,
  162.                        r.gname region,
  163.                        r.gurl url,
  164.                        max(a.search_depth) search_depth,
  165.                        y.queued,
  166.                        y.collected
  167.                    from google_accounts_subscriptions a
  168.                    join google_subscriptions y
  169.                        on a.google_subscription_id=y.id
  170.                    join google_regions r
  171.                        on y.google_region_id=r.id
  172.                    join queries q
  173.                        on y.query_id=q.id
  174.                    join resources_subscriptioninterval si
  175.                        on si.resource_subscription_id = a.id*8+1
  176.                    where (si.datetime_unsubscribed is null
  177.                        or si.datetime_unsubscribed>now())
  178.                        %(account_subscription_limiting)s
  179.                    group by y.id, q.querystring, r.gurl, r.gname;
  180.  
  181.                create temporary table subscription_next_task
  182.                on commit drop
  183.                as
  184.                    select
  185.                        id,
  186.                        querystring,
  187.                        region,
  188.                        url,
  189.                        search_depth
  190.                    from subscriptions_active
  191.                    where search_depth > 0
  192.                        %(yesterday_task)s
  193.                        %(task_id_limiting)s
  194.                        and (queued is null or queued < now()-'10 minutes'::interval)
  195.                    order by collected, id
  196.                    limit 1;
  197.  
  198.                update google_subscriptions
  199.                set queued=now()
  200.                where id in (select id from subscription_next_task);
  201.  
  202.                select * from subscription_next_task;
  203.            """
  204.  
  205.     def account_subscriptions_for_search_engine(self):
  206.         resource_subscriptions = list(ViewEntry.objects.filter(view__in=(View.objects.filter(
  207.             perms__permission__in=self.request.user.user_permissions.all()).distinct())).
  208.                                       values_list('subscription', flat=True))
  209.  
  210.         sql = """
  211.        select distinct rs._id
  212.        from resource_subscription_view rs
  213.          join resources_subscriptioninterval si on si.resource_subscription_id = rs.id
  214.        where rs.id in ({0})
  215.          and (si.datetime_unsubscribed is NULL or si.datetime_unsubscribed>now())
  216.          and (rs.search_engine = 'G')
  217.        order by rs._id
  218.        """.format(','.join(map(str, resource_subscriptions)))
  219.  
  220.         cursor = connection.cursor()
  221.         cursor.execute(sql)
  222.  
  223.         rows = cursor.fetchall()
  224.         return [row[0] for row in rows]
  225.  
  226.  
  227. class BaseSetResultBackend(object):
  228.     @staticmethod
  229.     def backend_for_type(type, **kwargs):
  230.         if type == 'yandex':
  231.             return YandexSetResultBackend(**kwargs)
  232.         elif type == 'google':
  233.             return GoogleSetResultBackend(**kwargs)
  234.         # elif type == 'wordstat':
  235.         #     return YandexWordstatSetResultBackend(**kwargs)
  236.  
  237.         raise AttributeError("Type value must be 'yandex' or 'google' or 'wordstat' either")
  238.  
  239.     def __init__(self, subscription_id):
  240.         self.subscription_id = subscription_id
  241.  
  242.     def set_results(self, results):
  243.         raise NotImplementedError
  244.  
  245.     def match_domains(self, site_url, url, subdomain_include):
  246.         import re
  247.  
  248.         url = url.decode("utf-8")
  249.         domain_name = '.'.join(site_url.split('.')[-2:])
  250.  
  251.         if subdomain_include == 'strict_domain':
  252.             return bool(re.match(ur'^http(s)?://{0}(:\d+)?/.*$'.format(site_url), url))
  253.         elif subdomain_include == 'domain_with_subdomains':
  254.             return bool(re.match(ur'^http(s)?://(.+\.)?{0}(:\d+)?/.*$'.format(domain_name), url))
  255.         elif subdomain_include == 'only_subdomains':
  256.             return bool(re.match(ur'^http(s)?://.+\.{0}(:\d+)?/.*$'.format(domain_name), url))
  257.  
  258.         return False
  259.  
  260.  
  261. class YandexSetResultBackend(BaseSetResultBackend):
  262.     def set_results(self, results):
  263.         timestamp = datetime.datetime.now()
  264.         cursor = connection.cursor()
  265.         SQL = """
  266.            INSERT INTO yandex_searchresults (url, position, yandex_subscription_id, timestamp, search_depth)
  267.            VALUES (%(url)s, %(position)s, %(yandex_subscription_id)s, %(timestamp)s, %(search_depth)s)
  268.        """
  269.         values = [
  270.             {
  271.                 'url': url.lower(),
  272.                 'position': i,
  273.                 'yandex_subscription_id': self.subscription_id,
  274.                 'timestamp': timestamp,
  275.                 'search_depth': len(results)
  276.             }
  277.             for i, url
  278.             in enumerate(results, start=1)
  279.         ]
  280.         cursor.executemany(SQL, values)
  281.         transaction.commit_unless_managed()
  282.         cursor.execute("""
  283.            SELECT DISTINCT a.id, a.search_depth, a.subdomain_include, w.hostname
  284.            FROM yandex_accounts_subscriptions a
  285.            JOIN monitoringengine_ui_resource ON monitoringengine_ui_resource.id = a.resource_id
  286.            JOIN websites w ON w.id = monitoringengine_ui_resource.website_id
  287.            JOIN resources_subscriptioninterval si ON si.resource_subscription_id = a.id*8
  288.            WHERE a.yandex_subscription_id=%(yandex_subscription_id)s AND
  289.                (si.datetime_unsubscribed IS NULL OR si.datetime_unsubscribed>now()) AND
  290.                a.id NOT IN (SELECT yandex_account_subscription_id FROM yandex_reports WHERE datestamp=%(timestamp)s::date)
  291.            """, {"yandex_subscription_id": self.subscription_id, 'timestamp': timestamp})
  292.         for yandex_account_subscription_id, search_depth, subdomain_include, hostname in cursor.fetchall():
  293.             filtered = [v for v in values[:search_depth] if self.match_domains(hostname, v['url'], subdomain_include)]
  294.             position = filtered[0]["position"] if len(filtered) else -1
  295.             full_url = filtered[0]["url"] if len(filtered) else ""
  296.             SQL_update = """
  297.                UPDATE yandex_reports SET
  298.                    position = %(position)s,
  299.                    search_depth = %(search_depth)s,
  300.                    full_url = %(full_url)s
  301.                WHERE yandex_account_subscription_id = %(yandex_account_subscription_id)s AND datestamp=%(datestamp)s
  302.            """
  303.             SQL_insert = """
  304.                INSERT INTO yandex_reports (yandex_account_subscription_id, datestamp, position, search_depth, full_url)
  305.                VALUES (%(yandex_account_subscription_id)s, %(datestamp)s, %(position)s, %(search_depth)s, %(full_url)s )
  306.            """
  307.             cursor.execute("""
  308.                SELECT  *
  309.                FROM yandex_reports
  310.                WHERE yandex_account_subscription_id = %(yandex_subscription_id)s AND datestamp=%(datestamp)s
  311.            """, {"yandex_subscription_id": self.subscription_id, 'datestamp': timestamp.date()})
  312.             report_values = {
  313.                 'yandex_account_subscription_id': yandex_account_subscription_id,
  314.                 'datestamp': timestamp.date(),
  315.                 'search_depth': min(len(results), search_depth),
  316.                 'position': str(position),
  317.                 'full_url': full_url
  318.             }
  319.            
  320.             if len(cursor.fetchall()) > 0:
  321.                 cursor.execute(SQL_update, report_values)
  322.                 transaction.commit_unless_managed()
  323.             else:
  324.                 cursor.execute(SQL_insert, report_values)
  325.                 transaction.commit_unless_managed()
  326.         sqlSetTaskExecutionTime = \
  327.             """
  328.            update yandex_subscriptions
  329.            set collected = now()
  330.            where id = %(subscription_id)s
  331.            """
  332.         cursor.execute(sqlSetTaskExecutionTime, {"subscription_id": self.subscription_id})
  333.         transaction.commit_unless_managed()
  334.         cursor.close()
  335.  
  336.  
  337. class GoogleSetResultBackend(BaseSetResultBackend):
  338.     def set_results(self, results):
  339.         timestamp = datetime.datetime.now()
  340.  
  341.         cursor = connection.cursor()
  342.         SQL = """
  343.            INSERT INTO google_searchresults (url, position, google_subscription_id, timestamp, search_depth)
  344.            VALUES (%(url)s, %(position)s, %(google_subscription_id)s, %(timestamp)s, %(search_depth)s)
  345.        """
  346.         values = [
  347.             {
  348.                 'url': url.lower(),
  349.                 'position': i,
  350.                 'google_subscription_id': self.subscription_id,
  351.                 'timestamp': timestamp,
  352.                 'search_depth': len(results)
  353.             }
  354.             for i, url
  355.             in enumerate(results, start=1)
  356.         ]
  357.         cursor.executemany(SQL, values)
  358.         transaction.commit_unless_managed()
  359.  
  360.         cursor.execute("""
  361.            SELECT DISTINCT a.id, a.search_depth, a.subdomain_include, w.hostname
  362.            FROM google_accounts_subscriptions a
  363.            JOIN resources_subscriptioninterval si ON si.resource_subscription_id = a.id*8+1
  364.            JOIN monitoringengine_ui_resource ON monitoringengine_ui_resource.id = a.resource_id
  365.            JOIN websites w ON w.id = monitoringengine_ui_resource.website_id
  366.            WHERE a.google_subscription_id=%(google_subscription_id)s AND
  367.                (si.datetime_unsubscribed IS NULL OR si.datetime_unsubscribed>now()) AND
  368.                a.id NOT IN (SELECT google_account_subscription_id FROM google_reports WHERE datestamp=%(timestamp)s::date)
  369.            """, {"google_subscription_id": self.subscription_id, 'timestamp': timestamp})
  370.         for google_account_subscription_id, search_depth, subdomain_include, hostname in cursor.fetchall():
  371.             filtered = [v for v in values[:search_depth] if self.match_domains(hostname, v['url'], subdomain_include)]
  372.             position = filtered[0]["position"] if len(filtered) else -1
  373.             full_url = filtered[0]["url"] if len(filtered) else ""
  374.  
  375.             cursor.execute("""INSERT INTO google_reports (google_account_subscription_id, datestamp, position, search_depth, full_url)
  376.                VALUES (%(google_account_subscription_id)s, %(datestamp)s, %(position)s, %(search_depth)s, %(full_url)s)""",
  377.                            {'google_account_subscription_id': google_account_subscription_id,
  378.                             'datestamp': timestamp.date(),
  379.                             'search_depth': min(len(results), search_depth),
  380.                             'position': str(position),
  381.                             'full_url': full_url})
  382.             transaction.commit_unless_managed()
  383.  
  384.         sqlSetTaskExecutionTime = \
  385.             """
  386.        update google_subscriptions
  387.        set collected = now()
  388.        where id = %(subscription_id)s
  389.        """
  390.  
  391.         cursor.execute(sqlSetTaskExecutionTime, {"subscription_id": self.subscription_id})
  392.         transaction.commit_unless_managed()
  393.  
  394.         cursor.close()
  395.  
  396.  
  397. def wordstat_set_results(results):
  398.     timestamp = datetime.datetime.now()
  399.     cursor = connection.cursor()
  400.     SQL = """
  401.        INSERT INTO yandex_wordstat (yandex_subscription_id, timestamp, common_quantity)
  402.        VALUES (%(yandex_subscription_id)s, %(timestamp)s, %(common_quantity)s)
  403.    """
  404.     values = [
  405.         {
  406.             'yandex_subscription_id': result["sub_id"],
  407.             'timestamp': timestamp,
  408.             'common_quantity': result["quantity"]
  409.         }
  410.         for result in results
  411.     ]
  412.     print values
  413.     cursor.executemany(SQL, values)
  414.     transaction.commit_unless_managed()
  415.     cursor.close()
  416.  
  417. def wordstat_task():
  418.     result={}
  419.     timestamp = datetime.datetime.now().date()
  420.     cursor = connection.cursor()
  421.     cursor.execute("""
  422.        SELECT q.querystring, r.code from yandex_subscriptions s
  423.        JOIN queries q ON q.id = s.query_id
  424.        JOIN yandex_regions r ON r.id = s.yandex_region_id
  425.        JOIN yandex_wordstat w ON s.id = w.yandex_subscription_id
  426.        where to_char(w.timestamp,'YYYY-MM-DD') != %(timestamp)s
  427.    """, {'timestamp': timestamp.strftime('%Y-%m-%d')})
  428.     if cursor.rowcount > 0:
  429.         row = cursor.fetchone()
  430.         result['query'] = row[0]
  431.         result['region'] = int(row[1])
  432.         return result
  433.  
  434.     else:
  435.         return 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement