Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- import datetime
- from django.db import connection, transaction
- from resources.models import ViewEntry, View
- class BaseGetTaskBackend(object):
- task_sql_template = None
- @staticmethod
- def backend_for_type(type, **kwargs):
- if type == 'yandex':
- return YandexGetTaskBackend(**kwargs)
- elif type == 'google':
- return GoogleGetTaskBackend(**kwargs)
- raise AttributeError("Type value must be 'yandex' or 'google' or 'wordstat' either")
- def __init__(self, request, scope='all', task_id=None):
- self.scope = scope
- self.task_id = task_id
- self.request = request
- def get_account_subscriptions_in_scope(self):
- if self.scope == 'user':
- return self.account_subscriptions_for_search_engine()
- return None
- def get_task(self, account_subscription_ids=None):
- get_task_sql = self.generate_get_task_sql(account_subscription_ids)
- if not get_task_sql:
- return None, None
- cursor = connection.cursor()
- cursor.execute(get_task_sql)
- task = cursor.fetchone() if cursor.rowcount > 0 else None
- description = cursor.description
- cursor.close()
- transaction.commit_unless_managed()
- return description, task
- def account_subscriptions_for_search_engine(self):
- raise NotImplementedError
- def generate_get_task_sql(self, account_subscription_ids):
- task_sql_template = self.get_task_sql_template()
- if not task_sql_template:
- return None
- account_subscription_limiting = ''
- if not account_subscription_ids is None:
- assert isinstance(account_subscription_ids, list), "'account_subscription_ids' variable must be list"
- if len(account_subscription_ids) == 0:
- return None
- account_subscription_limiting = ' and y.id in (%s)' % ','.join(map(str, account_subscription_ids))
- yesterday_task_cond = "and (collected is null or collected < now()-'24 hours'::interval)" if not self.task_id else ''
- task_id_limiting = 'and id = {0}'.format(self.task_id) if self.task_id else ''
- return task_sql_template % {
- 'account_subscription_limiting': account_subscription_limiting,
- 'yesterday_task': yesterday_task_cond,
- 'task_id_limiting': task_id_limiting,
- }
- def get_task_sql_template(self):
- return self.task_sql_template
- class YandexGetTaskBackend(BaseGetTaskBackend):
- task_sql_template = """
- lock table yandex_subscriptions in access exclusive mode;
- create temporary table subscriptions_active
- on commit drop
- as
- select
- y.id id,
- q.querystring querystring,
- r.code region,
- max(a.search_depth) search_depth,
- y.queued,
- y.collected
- from yandex_accounts_subscriptions a
- join yandex_subscriptions y
- on a.yandex_subscription_id=y.id
- join yandex_regions r
- on y.yandex_region_id=r.id
- join queries q
- on y.query_id=q.id
- join resources_subscriptioninterval si
- on si.resource_subscription_id = a.id*8
- where (si.datetime_unsubscribed is null
- or si.datetime_unsubscribed>now())
- %(account_subscription_limiting)s
- group by y.id, q.querystring, r.code;
- create temporary table subscription_next_task
- on commit drop
- as
- select
- id,
- querystring,
- region,
- search_depth
- from subscriptions_active
- where search_depth > 0
- %(yesterday_task)s
- %(task_id_limiting)s
- and (queued is null or queued < now()-'10 minutes'::interval)
- order by collected, id
- limit 1;
- update yandex_subscriptions
- set queued=now()
- where id in (select id from subscription_next_task);
- select * from subscription_next_task;
- """
- def account_subscriptions_for_search_engine(self):
- resource_subscriptions = list(ViewEntry.objects.filter(view__in=(View.objects.filter(
- perms__permission__in=self.request.user.user_permissions.all()).distinct())).
- values_list('subscription', flat=True))
- sql = """
- select distinct rs._id
- from resource_subscription_view rs
- join resources_subscriptioninterval si on si.resource_subscription_id = rs.id
- where rs.id in ({0})
- and (si.datetime_unsubscribed is NULL or si.datetime_unsubscribed>now())
- and (rs.search_engine = 'Y')
- order by rs._id
- """.format(','.join(map(str, resource_subscriptions)))
- cursor = connection.cursor()
- cursor.execute(sql)
- rows = cursor.fetchall()
- return [row[0] for row in rows]
- class GoogleGetTaskBackend(BaseGetTaskBackend):
- task_sql_template = """
- lock table google_subscriptions in access exclusive mode;
- create temporary table subscriptions_active
- on commit drop
- as
- select
- y.id id,
- q.querystring querystring,
- r.gname region,
- r.gurl url,
- max(a.search_depth) search_depth,
- y.queued,
- y.collected
- from google_accounts_subscriptions a
- join google_subscriptions y
- on a.google_subscription_id=y.id
- join google_regions r
- on y.google_region_id=r.id
- join queries q
- on y.query_id=q.id
- join resources_subscriptioninterval si
- on si.resource_subscription_id = a.id*8+1
- where (si.datetime_unsubscribed is null
- or si.datetime_unsubscribed>now())
- %(account_subscription_limiting)s
- group by y.id, q.querystring, r.gurl, r.gname;
- create temporary table subscription_next_task
- on commit drop
- as
- select
- id,
- querystring,
- region,
- url,
- search_depth
- from subscriptions_active
- where search_depth > 0
- %(yesterday_task)s
- %(task_id_limiting)s
- and (queued is null or queued < now()-'10 minutes'::interval)
- order by collected, id
- limit 1;
- update google_subscriptions
- set queued=now()
- where id in (select id from subscription_next_task);
- select * from subscription_next_task;
- """
- def account_subscriptions_for_search_engine(self):
- resource_subscriptions = list(ViewEntry.objects.filter(view__in=(View.objects.filter(
- perms__permission__in=self.request.user.user_permissions.all()).distinct())).
- values_list('subscription', flat=True))
- sql = """
- select distinct rs._id
- from resource_subscription_view rs
- join resources_subscriptioninterval si on si.resource_subscription_id = rs.id
- where rs.id in ({0})
- and (si.datetime_unsubscribed is NULL or si.datetime_unsubscribed>now())
- and (rs.search_engine = 'G')
- order by rs._id
- """.format(','.join(map(str, resource_subscriptions)))
- cursor = connection.cursor()
- cursor.execute(sql)
- rows = cursor.fetchall()
- return [row[0] for row in rows]
- class BaseSetResultBackend(object):
- @staticmethod
- def backend_for_type(type, **kwargs):
- if type == 'yandex':
- return YandexSetResultBackend(**kwargs)
- elif type == 'google':
- return GoogleSetResultBackend(**kwargs)
- # elif type == 'wordstat':
- # return YandexWordstatSetResultBackend(**kwargs)
- raise AttributeError("Type value must be 'yandex' or 'google' or 'wordstat' either")
- def __init__(self, subscription_id):
- self.subscription_id = subscription_id
- def set_results(self, results):
- raise NotImplementedError
- def match_domains(self, site_url, url, subdomain_include):
- import re
- url = url.decode("utf-8")
- domain_name = '.'.join(site_url.split('.')[-2:])
- if subdomain_include == 'strict_domain':
- return bool(re.match(ur'^http(s)?://{0}(:\d+)?/.*$'.format(site_url), url))
- elif subdomain_include == 'domain_with_subdomains':
- return bool(re.match(ur'^http(s)?://(.+\.)?{0}(:\d+)?/.*$'.format(domain_name), url))
- elif subdomain_include == 'only_subdomains':
- return bool(re.match(ur'^http(s)?://.+\.{0}(:\d+)?/.*$'.format(domain_name), url))
- return False
- class YandexSetResultBackend(BaseSetResultBackend):
- def set_results(self, results):
- timestamp = datetime.datetime.now()
- cursor = connection.cursor()
- SQL = """
- INSERT INTO yandex_searchresults (url, position, yandex_subscription_id, timestamp, search_depth)
- VALUES (%(url)s, %(position)s, %(yandex_subscription_id)s, %(timestamp)s, %(search_depth)s)
- """
- values = [
- {
- 'url': url.lower(),
- 'position': i,
- 'yandex_subscription_id': self.subscription_id,
- 'timestamp': timestamp,
- 'search_depth': len(results)
- }
- for i, url
- in enumerate(results, start=1)
- ]
- cursor.executemany(SQL, values)
- transaction.commit_unless_managed()
- cursor.execute("""
- SELECT DISTINCT a.id, a.search_depth, a.subdomain_include, w.hostname
- FROM yandex_accounts_subscriptions a
- JOIN monitoringengine_ui_resource ON monitoringengine_ui_resource.id = a.resource_id
- JOIN websites w ON w.id = monitoringengine_ui_resource.website_id
- JOIN resources_subscriptioninterval si ON si.resource_subscription_id = a.id*8
- WHERE a.yandex_subscription_id=%(yandex_subscription_id)s AND
- (si.datetime_unsubscribed IS NULL OR si.datetime_unsubscribed>now()) AND
- a.id NOT IN (SELECT yandex_account_subscription_id FROM yandex_reports WHERE datestamp=%(timestamp)s::date)
- """, {"yandex_subscription_id": self.subscription_id, 'timestamp': timestamp})
- for yandex_account_subscription_id, search_depth, subdomain_include, hostname in cursor.fetchall():
- filtered = [v for v in values[:search_depth] if self.match_domains(hostname, v['url'], subdomain_include)]
- position = filtered[0]["position"] if len(filtered) else -1
- full_url = filtered[0]["url"] if len(filtered) else ""
- SQL_update = """
- UPDATE yandex_reports SET
- position = %(position)s,
- search_depth = %(search_depth)s,
- full_url = %(full_url)s
- WHERE yandex_account_subscription_id = %(yandex_account_subscription_id)s AND datestamp=%(datestamp)s
- """
- SQL_insert = """
- INSERT INTO yandex_reports (yandex_account_subscription_id, datestamp, position, search_depth, full_url)
- VALUES (%(yandex_account_subscription_id)s, %(datestamp)s, %(position)s, %(search_depth)s, %(full_url)s )
- """
- cursor.execute("""
- SELECT *
- FROM yandex_reports
- WHERE yandex_account_subscription_id = %(yandex_subscription_id)s AND datestamp=%(datestamp)s
- """, {"yandex_subscription_id": self.subscription_id, 'datestamp': timestamp.date()})
- report_values = {
- 'yandex_account_subscription_id': yandex_account_subscription_id,
- 'datestamp': timestamp.date(),
- 'search_depth': min(len(results), search_depth),
- 'position': str(position),
- 'full_url': full_url
- }
- if len(cursor.fetchall()) > 0:
- cursor.execute(SQL_update, report_values)
- transaction.commit_unless_managed()
- else:
- cursor.execute(SQL_insert, report_values)
- transaction.commit_unless_managed()
- sqlSetTaskExecutionTime = \
- """
- update yandex_subscriptions
- set collected = now()
- where id = %(subscription_id)s
- """
- cursor.execute(sqlSetTaskExecutionTime, {"subscription_id": self.subscription_id})
- transaction.commit_unless_managed()
- cursor.close()
- class GoogleSetResultBackend(BaseSetResultBackend):
- def set_results(self, results):
- timestamp = datetime.datetime.now()
- cursor = connection.cursor()
- SQL = """
- INSERT INTO google_searchresults (url, position, google_subscription_id, timestamp, search_depth)
- VALUES (%(url)s, %(position)s, %(google_subscription_id)s, %(timestamp)s, %(search_depth)s)
- """
- values = [
- {
- 'url': url.lower(),
- 'position': i,
- 'google_subscription_id': self.subscription_id,
- 'timestamp': timestamp,
- 'search_depth': len(results)
- }
- for i, url
- in enumerate(results, start=1)
- ]
- cursor.executemany(SQL, values)
- transaction.commit_unless_managed()
- cursor.execute("""
- SELECT DISTINCT a.id, a.search_depth, a.subdomain_include, w.hostname
- FROM google_accounts_subscriptions a
- JOIN resources_subscriptioninterval si ON si.resource_subscription_id = a.id*8+1
- JOIN monitoringengine_ui_resource ON monitoringengine_ui_resource.id = a.resource_id
- JOIN websites w ON w.id = monitoringengine_ui_resource.website_id
- WHERE a.google_subscription_id=%(google_subscription_id)s AND
- (si.datetime_unsubscribed IS NULL OR si.datetime_unsubscribed>now()) AND
- a.id NOT IN (SELECT google_account_subscription_id FROM google_reports WHERE datestamp=%(timestamp)s::date)
- """, {"google_subscription_id": self.subscription_id, 'timestamp': timestamp})
- for google_account_subscription_id, search_depth, subdomain_include, hostname in cursor.fetchall():
- filtered = [v for v in values[:search_depth] if self.match_domains(hostname, v['url'], subdomain_include)]
- position = filtered[0]["position"] if len(filtered) else -1
- full_url = filtered[0]["url"] if len(filtered) else ""
- cursor.execute("""INSERT INTO google_reports (google_account_subscription_id, datestamp, position, search_depth, full_url)
- VALUES (%(google_account_subscription_id)s, %(datestamp)s, %(position)s, %(search_depth)s, %(full_url)s)""",
- {'google_account_subscription_id': google_account_subscription_id,
- 'datestamp': timestamp.date(),
- 'search_depth': min(len(results), search_depth),
- 'position': str(position),
- 'full_url': full_url})
- transaction.commit_unless_managed()
- sqlSetTaskExecutionTime = \
- """
- update google_subscriptions
- set collected = now()
- where id = %(subscription_id)s
- """
- cursor.execute(sqlSetTaskExecutionTime, {"subscription_id": self.subscription_id})
- transaction.commit_unless_managed()
- cursor.close()
- def wordstat_set_results(results):
- timestamp = datetime.datetime.now()
- cursor = connection.cursor()
- SQL = """
- INSERT INTO yandex_wordstat (yandex_subscription_id, timestamp, common_quantity)
- VALUES (%(yandex_subscription_id)s, %(timestamp)s, %(common_quantity)s)
- """
- values = [
- {
- 'yandex_subscription_id': result["sub_id"],
- 'timestamp': timestamp,
- 'common_quantity': result["quantity"]
- }
- for result in results
- ]
- print values
- cursor.executemany(SQL, values)
- transaction.commit_unless_managed()
- cursor.close()
- def wordstat_task():
- result={}
- timestamp = datetime.datetime.now().date()
- cursor = connection.cursor()
- cursor.execute("""
- SELECT q.querystring, r.code from yandex_subscriptions s
- JOIN queries q ON q.id = s.query_id
- JOIN yandex_regions r ON r.id = s.yandex_region_id
- JOIN yandex_wordstat w ON s.id = w.yandex_subscription_id
- where to_char(w.timestamp,'YYYY-MM-DD') != %(timestamp)s
- """, {'timestamp': timestamp.strftime('%Y-%m-%d')})
- if cursor.rowcount > 0:
- row = cursor.fetchone()
- result['query'] = row[0]
- result['region'] = int(row[1])
- return result
- else:
- return 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement