Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # pg_stat_growth_tables.py - Collects statistics
- # about size growth of tables, saves it to stat files,
- # calculate stat and sends a mail notification that contents of:
- # 1) top tables by size
- # 2) top tables by 1 day growth (with a difference)
- # 3) similar lists for 7 days and 30 days
- #
- # Author: Andrey Klychkov aaklychkov@mail.ru
- # Licence: Copyleft free software
- # Date: 29.03.2018
- #
- # Requirements: psql,
- # run it as the 'postgres' user
- #
- # Important: You must set up desired mail server
- # settings to send mail notifications
- #
- # Usage: ./pg_stat_growth_tables.py DBNAME STAT_DIR
- #
- # Notification's content example:
- # -------------------------------
- # ## Top 20 tables by size (in MB) ##
- # -----------------------------------
- # 121025 test_table1
- # 57025 test_table2
- # 53670 test_table3
- #
- # ## Top tables by growth 1 day (in MB) ##
- # ----------------------------------------------
- # 229 test_table4 3%
- # 211 test_table1 0%
- # 125 test_table1 0%
- # ...
- # Similar lists for 7 and 30 days stat
- from __future__ import print_function
- import datetime
- import os
- import smtplib
- import socket
- import subprocess
- import sys
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- from pwd import getpwnam
- ########################
- # PARAMETERS BLOCK #
- ########################
- # Check a number of command-line arguments:
- if len(sys.argv) != 3:
- print('Syntax: ./pg_stat_growth_tables.py DBNAME STAT_DIR')
- sys.exit(1)
- # Main params:
- # ------------
- DBNAME = sys.argv[1]
- STAT_DIR = sys.argv[2]+'/'
- # min table size for accounting, in bytes:
- MIN_TBL_SIZE = 1000000000
- # line number for notifications:
- NUM_LINES_REPORT = 20
- # Common params:
- # --------------
- LOCAL_USER = 'postgres'
- HOSTNAME = socket.gethostname()
- NOW = datetime.datetime.now()
- DATE = NOW.strftime('%Y%m%d')
- TIME = NOW.strftime('%Y.%m.%d %H:%M')
- # Mail params:
- # ------------
- SEND_MAIL = 1
- SENDER = 'report.yourcompany@gmail.com'
- RECIPIENT = ['your.mailbox@example.com']
- SMTP_SRV = 'smtp.gmail.com'
- SMTP_PORT = 587
- SMTP_PASS = 'PasswdHere'
- # Mail subject template:
- mail_sbj = '%s: database %s statistic' % (HOSTNAME, DBNAME)
- def send_mail(sbj, ms):
- if SEND_MAIL:
- msg = MIMEMultipart()
- msg['Subject'] = (sbj)
- msg['From'] = 'root@%s' % HOSTNAME
- msg['To'] = RECIPIENT[0]
- body = MIMEText(ms, 'plain')
- msg.attach(body)
- smtpconnect = smtplib.SMTP(SMTP_SRV, SMTP_PORT)
- smtpconnect.starttls()
- smtpconnect.login(SENDER, SMTP_PASS)
- smtpconnect.sendmail(SENDER, RECIPIENT, msg.as_string())
- smtpconnect.quit()
- else:
- pass
- def create_dir(dr):
- try:
- os.mkdir(dr)
- os.chown(dr, getpwnam(LOCAL_USER).pw_uid, getpwnam(LOCAL_USER).pw_gid)
- os.chmod(dr, 0700)
- except Exception as e:
- print(e)
- sys.exit(1)
- def check_dir(dr):
- if not os.path.isdir(dr):
- create_dir(dr)
- def do_shell_cmd(cmd):
- ret = subprocess.Popen(cmd, shell=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT)
- out = ret.stdout.readlines()
- ret.communicate()
- return out
- def do_psql_cmd(dbname, cmd):
- psql_cmd = 'psql %s -t -A -c "%s"' % (dbname, cmd)
- result = do_shell_cmd(psql_cmd)
- return result
- def db_write(db_file, entry):
- '''
- Write size stat from own stat files
- '''
- db = open(db_file, 'a')
- try:
- db.write(entry)
- finally:
- db.close()
- def db_read(db_file):
- '''
- Read size stat from own stat files
- '''
- if not os.path.isfile(db_file):
- with open(db_file, 'w') as f:
- pass
- db = open(db_file, 'r')
- try:
- stat = db.readlines()
- return stat
- finally:
- db.close()
- def growth_per_days(days):
- growth_b = 0
- days_num = len(t_db)
- tm = days - 1
- if days_num < tm:
- t_size_futher_day = int(t_db[0].split(',')[1])
- else:
- t_size_futher_day = int(t_db[-tm].split(',')[1])
- growth_b = t_size - t_size_futher_day
- if t_size_futher_day != 0:
- growth_p = int(growth_b / float(t_size_futher_day) * 100)
- else:
- growth_p = 0
- return (growth_b, growth_p)
- def get_top_by_size(t_list, num):
- size_info = ['## Top %s tables by size (in MB) ##' % num,
- '-----------------------------------']
- for t in t_list[0:num]:
- info = t.split('|')
- name = info[0]
- size = int(float(info[1]) / 1024 / 1024)
- size_info.append('%s %s' % (size, name))
- top_size = '\n'.join(size_info)
- return top_size
- if __name__ == '__main__':
- # Initialize a notification's body:
- report = ['%s\n\n' % TIME] # Report initializing
- # Check script data directory,
- # create it if doesn't exist:
- check_dir(STAT_DIR)
- # Get a table list with size in byte:
- get_tbl_list_with_size = "SELECT c.relname,\
- pg_catalog.pg_total_relation_size(c.oid)\
- FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n\
- ON n.oid = c.relnamespace WHERE c.relkind IN ('r','')\
- AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema'\
- AND n.nspname !~ '^pg_toast' AND pg_catalog.pg_table_is_visible(c.oid)\
- ORDER BY pg_catalog.pg_total_relation_size(c.oid) DESC;"
- tbl_list_with_size = do_psql_cmd(DBNAME, get_tbl_list_with_size)
- # ####################################
- # For you information: rows in a table
- # stat file in the STAT_DIR contents:
- # list = [0 date,
- # 1 cursize,
- # 2 growth_b_per_1_day,
- # 3 growth_%_per_1_day,
- # 4 growth_b_per_7_days,
- # 5 growth_%_per_7_days,
- # 6 growth_b_per_30_days,
- # 7 growth_%_per_30_days,
- # 8 growth_avrg_per_7_days,
- # 9 growth_avrg_per_30_days]
- ######################################
- for t in tbl_list_with_size:
- t_spl = t.split('|')
- t_name = t_spl[0]
- t_size = int(t_spl[1].rstrip('\n'))
- t_db_file = STAT_DIR+t_name+'.db'
- if int(t_size) >= MIN_TBL_SIZE:
- t_db = db_read(t_db_file)
- # If t_db_file is empty:
- if len(t_db) == 0:
- t_stat_entry = '%s,%s,0,0,0,0,0,0,0,0\n' % (DATE, t_size)
- db_write(t_db_file, t_stat_entry)
- else:
- prev_t_size = int(t_db[-1].split(',')[1])
- # 2 growth_b_per_1_day:
- growth_1_day_b = t_size - prev_t_size
- # 3 growth_%_per_1_day:
- if prev_t_size != 0:
- growth_1_day_p = int(
- growth_1_day_b / float(prev_t_size) * 100)
- else:
- growth_1_day_p = 0
- # 4, 5 growth_per_7_days:
- growth_7_days = growth_per_days(7)
- growth_7_days_b = growth_7_days[0]
- growth_7_days_p = growth_7_days[1]
- # 6, 7 growth_per_30_days:
- growth_30_days = growth_per_days(30)
- growth_30_days_b = growth_30_days[0]
- growth_30_days_p = growth_30_days[1]
- # 8 growth_avrg_per_7_days:
- growth_7_days_avg = growth_7_days_b / 7
- # 9 growth_avrg_per_30_days:
- growth_30_days_avg = growth_30_days_b / 30
- entry_cur_day_stat = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (
- DATE,
- t_size,
- growth_1_day_b,
- growth_1_day_p,
- growth_7_days_b,
- growth_7_days_p,
- growth_30_days_b,
- growth_30_days_p,
- growth_7_days_avg,
- growth_30_days_avg)
- db_write(t_db_file, entry_cur_day_stat)
- else:
- tbl_list_with_size.remove(t)
- # Generation parts of a notification:
- # 0) Top tables by size:
- top_by_current_size = get_top_by_size(tbl_list_with_size, NUM_LINES_REPORT)
- # 1) Top tables by day's growth:
- top_by_1_day_growth_list = []
- top_by_7_day_growth_list = []
- top_by_30_day_growth_list = []
- for t in tbl_list_with_size:
- t_spl = t.split('|')
- t_name = t_spl[0]
- t_size = int(t_spl[1].rstrip('\n'))
- t_db_file = STAT_DIR+t_name+'.db'
- if int(t_size) >= MIN_TBL_SIZE:
- t_db = db_read(t_db_file)
- growth_1_day_mb = int(float(t_db[-1].split(',')[2]) / 1024 / 1024)
- growth_7_day_mb = int(float(t_db[-1].split(',')[4]) / 1024 / 1024)
- growth_30_day_mb = int(float(t_db[-1].split(',')[6]) / 1024 / 1024)
- growth_1_day_p = t_db[-1].split(',')[3]
- growth_7_day_p = t_db[-1].split(',')[5]
- growth_30_day_p = t_db[-1].split(',')[7]
- if growth_1_day_mb > 0:
- entry_growth1 = "%s %s %s%%\n" % (
- growth_1_day_mb, t_name, growth_1_day_p)
- top_by_1_day_growth_list.append(entry_growth1)
- if growth_7_day_mb > 0:
- entry_growth7 = "%s %s %s%%\n" % (
- growth_7_day_mb, t_name, growth_7_day_p)
- top_by_7_day_growth_list.append(entry_growth7)
- if growth_30_day_mb > 0:
- entry_growth30 = "%s %s %s%%\n" % (
- growth_30_day_mb, t_name, growth_7_day_p)
- top_by_30_day_growth_list.append(entry_growth30)
- # Make a notification:
- top_by_1_day_growth_sorted = sorted(
- top_by_1_day_growth_list, key=lambda x: int(
- x[:x.index(' ')]), reverse=True)
- top_by_7_day_growth_sorted = sorted(
- top_by_7_day_growth_list, key=lambda x: int(
- x[:x.index(' ')]), reverse=True)
- top_by_30_day_growth_sorted = sorted(
- top_by_30_day_growth_list, key=lambda x: int(
- x[:x.index(' ')]), reverse=True)
- report_1 = ''.join(top_by_1_day_growth_sorted[:NUM_LINES_REPORT-1])
- report_7 = ''.join(top_by_7_day_growth_sorted[:NUM_LINES_REPORT-1])
- report_30 = ''.join(top_by_30_day_growth_sorted[:NUM_LINES_REPORT-1])
- delim = (46 * '-')+'\n'
- completed_report = top_by_current_size+'\n'
- completed_report += '\n## Top tables by growth 1 day (in MB) '
- completed_report += '##\n%s%s\n' % (delim, report_1)
- completed_report += '## Top tables by growth 7 days (in MB) '
- completed_report += '##\n%s%s\n' % (delim, report_7)
- completed_report += '## Top tables by growth 30 days (in MB) '
- completed_report += '##\n%s%s' % (delim, report_30)
- # Send a notification:
- mail_sbj = '%s: table growth statistic information' % HOSTNAME
- send_mail(mail_sbj, completed_report)
- sys.exit(0)
Add Comment
Please, Sign In to add comment