Guest User

Untitled

a guest
Apr 6th, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.52 KB | None | 0 0
  1. #!/usr/bin/python
  2. # pg_stat_growth_tables.py - Collects statistics
  3. # about size growth of tables, saves it to stat files,
  4. # calculate stat and sends a mail notification that contents of:
  5. # 1) top tables by size
  6. # 2) top tables by 1 day growth (with a difference)
  7. # 3) similar lists for 7 days and 30 days
  8. #
  9. # Author: Andrey Klychkov aaklychkov@mail.ru
  10. # Licence: Copyleft free software
  11. # Date: 29.03.2018
  12. #
  13. # Requirements: psql,
  14. # run it as the 'postgres' user
  15. #
  16. # Important: You must set up desired mail server
  17. # settings to send mail notifications
  18. #
  19. # Usage: ./pg_stat_growth_tables.py DBNAME STAT_DIR
  20. #
  21. # Notification's content example:
  22. # -------------------------------
  23. # ## Top 20 tables by size (in MB) ##
  24. # -----------------------------------
  25. # 121025 test_table1
  26. # 57025 test_table2
  27. # 53670 test_table3
  28. #
  29. # ## Top tables by growth 1 day (in MB) ##
  30. # ----------------------------------------------
  31. # 229 test_table4 3%
  32. # 211 test_table1 0%
  33. # 125 test_table1 0%
  34. # ...
  35. # Similar lists for 7 and 30 days stat
  36.  
  37. from __future__ import print_function
  38. import datetime
  39. import os
  40. import smtplib
  41. import socket
  42. import subprocess
  43. import sys
  44. from email.mime.multipart import MIMEMultipart
  45. from email.mime.text import MIMEText
  46. from pwd import getpwnam
  47.  
  48. ########################
  49. # PARAMETERS BLOCK #
  50. ########################
  51.  
  52. # Check a number of command-line arguments:
  53. if len(sys.argv) != 3:
  54. print('Syntax: ./pg_stat_growth_tables.py DBNAME STAT_DIR')
  55. sys.exit(1)
  56.  
  57. # Main params:
  58. # ------------
  59. DBNAME = sys.argv[1]
  60. STAT_DIR = sys.argv[2]+'/'
  61. # min table size for accounting, in bytes:
  62. MIN_TBL_SIZE = 1000000000
  63. # line number for notifications:
  64. NUM_LINES_REPORT = 20
  65.  
  66. # Common params:
  67. # --------------
  68. LOCAL_USER = 'postgres'
  69. HOSTNAME = socket.gethostname()
  70. NOW = datetime.datetime.now()
  71. DATE = NOW.strftime('%Y%m%d')
  72. TIME = NOW.strftime('%Y.%m.%d %H:%M')
  73.  
  74. # Mail params:
  75. # ------------
  76. SEND_MAIL = 1
  77. SENDER = 'report.yourcompany@gmail.com'
  78. RECIPIENT = ['your.mailbox@example.com']
  79. SMTP_SRV = 'smtp.gmail.com'
  80. SMTP_PORT = 587
  81. SMTP_PASS = 'PasswdHere'
  82. # Mail subject template:
  83. mail_sbj = '%s: database %s statistic' % (HOSTNAME, DBNAME)
  84.  
  85.  
  86. def send_mail(sbj, ms):
  87. if SEND_MAIL:
  88. msg = MIMEMultipart()
  89. msg['Subject'] = (sbj)
  90. msg['From'] = 'root@%s' % HOSTNAME
  91. msg['To'] = RECIPIENT[0]
  92. body = MIMEText(ms, 'plain')
  93. msg.attach(body)
  94. smtpconnect = smtplib.SMTP(SMTP_SRV, SMTP_PORT)
  95. smtpconnect.starttls()
  96. smtpconnect.login(SENDER, SMTP_PASS)
  97. smtpconnect.sendmail(SENDER, RECIPIENT, msg.as_string())
  98. smtpconnect.quit()
  99. else:
  100. pass
  101.  
  102.  
  103. def create_dir(dr):
  104. try:
  105. os.mkdir(dr)
  106. os.chown(dr, getpwnam(LOCAL_USER).pw_uid, getpwnam(LOCAL_USER).pw_gid)
  107. os.chmod(dr, 0700)
  108. except Exception as e:
  109. print(e)
  110. sys.exit(1)
  111.  
  112.  
  113. def check_dir(dr):
  114. if not os.path.isdir(dr):
  115. create_dir(dr)
  116.  
  117.  
  118. def do_shell_cmd(cmd):
  119. ret = subprocess.Popen(cmd, shell=True,
  120. stdout=subprocess.PIPE,
  121. stderr=subprocess.STDOUT)
  122. out = ret.stdout.readlines()
  123. ret.communicate()
  124. return out
  125.  
  126.  
  127. def do_psql_cmd(dbname, cmd):
  128. psql_cmd = 'psql %s -t -A -c "%s"' % (dbname, cmd)
  129. result = do_shell_cmd(psql_cmd)
  130. return result
  131.  
  132.  
  133. def db_write(db_file, entry):
  134. '''
  135. Write size stat from own stat files
  136. '''
  137. db = open(db_file, 'a')
  138. try:
  139. db.write(entry)
  140. finally:
  141. db.close()
  142.  
  143.  
  144. def db_read(db_file):
  145. '''
  146. Read size stat from own stat files
  147. '''
  148. if not os.path.isfile(db_file):
  149. with open(db_file, 'w') as f:
  150. pass
  151.  
  152. db = open(db_file, 'r')
  153. try:
  154. stat = db.readlines()
  155. return stat
  156. finally:
  157. db.close()
  158.  
  159.  
  160. def growth_per_days(days):
  161. growth_b = 0
  162. days_num = len(t_db)
  163. tm = days - 1
  164. if days_num < tm:
  165. t_size_futher_day = int(t_db[0].split(',')[1])
  166. else:
  167. t_size_futher_day = int(t_db[-tm].split(',')[1])
  168.  
  169. growth_b = t_size - t_size_futher_day
  170.  
  171. if t_size_futher_day != 0:
  172. growth_p = int(growth_b / float(t_size_futher_day) * 100)
  173. else:
  174. growth_p = 0
  175.  
  176. return (growth_b, growth_p)
  177.  
  178.  
  179. def get_top_by_size(t_list, num):
  180. size_info = ['## Top %s tables by size (in MB) ##' % num,
  181. '-----------------------------------']
  182. for t in t_list[0:num]:
  183. info = t.split('|')
  184. name = info[0]
  185. size = int(float(info[1]) / 1024 / 1024)
  186. size_info.append('%s %s' % (size, name))
  187.  
  188. top_size = '\n'.join(size_info)
  189. return top_size
  190.  
  191.  
  192. if __name__ == '__main__':
  193.  
  194. # Initialize a notification's body:
  195. report = ['%s\n\n' % TIME] # Report initializing
  196.  
  197. # Check script data directory,
  198. # create it if doesn't exist:
  199. check_dir(STAT_DIR)
  200.  
  201. # Get a table list with size in byte:
  202. get_tbl_list_with_size = "SELECT c.relname,\
  203. pg_catalog.pg_total_relation_size(c.oid)\
  204. FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n\
  205. ON n.oid = c.relnamespace WHERE c.relkind IN ('r','')\
  206. AND n.nspname <> 'pg_catalog' AND n.nspname <> 'information_schema'\
  207. AND n.nspname !~ '^pg_toast' AND pg_catalog.pg_table_is_visible(c.oid)\
  208. ORDER BY pg_catalog.pg_total_relation_size(c.oid) DESC;"
  209.  
  210. tbl_list_with_size = do_psql_cmd(DBNAME, get_tbl_list_with_size)
  211.  
  212. # ####################################
  213. # For you information: rows in a table
  214. # stat file in the STAT_DIR contents:
  215. # list = [0 date,
  216. # 1 cursize,
  217. # 2 growth_b_per_1_day,
  218. # 3 growth_%_per_1_day,
  219. # 4 growth_b_per_7_days,
  220. # 5 growth_%_per_7_days,
  221. # 6 growth_b_per_30_days,
  222. # 7 growth_%_per_30_days,
  223. # 8 growth_avrg_per_7_days,
  224. # 9 growth_avrg_per_30_days]
  225. ######################################
  226.  
  227. for t in tbl_list_with_size:
  228. t_spl = t.split('|')
  229. t_name = t_spl[0]
  230. t_size = int(t_spl[1].rstrip('\n'))
  231. t_db_file = STAT_DIR+t_name+'.db'
  232.  
  233. if int(t_size) >= MIN_TBL_SIZE:
  234. t_db = db_read(t_db_file)
  235.  
  236. # If t_db_file is empty:
  237. if len(t_db) == 0:
  238. t_stat_entry = '%s,%s,0,0,0,0,0,0,0,0\n' % (DATE, t_size)
  239. db_write(t_db_file, t_stat_entry)
  240. else:
  241. prev_t_size = int(t_db[-1].split(',')[1])
  242.  
  243. # 2 growth_b_per_1_day:
  244. growth_1_day_b = t_size - prev_t_size
  245.  
  246. # 3 growth_%_per_1_day:
  247. if prev_t_size != 0:
  248. growth_1_day_p = int(
  249. growth_1_day_b / float(prev_t_size) * 100)
  250. else:
  251. growth_1_day_p = 0
  252.  
  253. # 4, 5 growth_per_7_days:
  254. growth_7_days = growth_per_days(7)
  255. growth_7_days_b = growth_7_days[0]
  256. growth_7_days_p = growth_7_days[1]
  257.  
  258. # 6, 7 growth_per_30_days:
  259. growth_30_days = growth_per_days(30)
  260. growth_30_days_b = growth_30_days[0]
  261. growth_30_days_p = growth_30_days[1]
  262.  
  263. # 8 growth_avrg_per_7_days:
  264. growth_7_days_avg = growth_7_days_b / 7
  265. # 9 growth_avrg_per_30_days:
  266. growth_30_days_avg = growth_30_days_b / 30
  267.  
  268. entry_cur_day_stat = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (
  269. DATE,
  270. t_size,
  271. growth_1_day_b,
  272. growth_1_day_p,
  273. growth_7_days_b,
  274. growth_7_days_p,
  275. growth_30_days_b,
  276. growth_30_days_p,
  277. growth_7_days_avg,
  278. growth_30_days_avg)
  279.  
  280. db_write(t_db_file, entry_cur_day_stat)
  281.  
  282. else:
  283. tbl_list_with_size.remove(t)
  284.  
  285. # Generation parts of a notification:
  286. # 0) Top tables by size:
  287. top_by_current_size = get_top_by_size(tbl_list_with_size, NUM_LINES_REPORT)
  288.  
  289. # 1) Top tables by day's growth:
  290. top_by_1_day_growth_list = []
  291. top_by_7_day_growth_list = []
  292. top_by_30_day_growth_list = []
  293. for t in tbl_list_with_size:
  294. t_spl = t.split('|')
  295. t_name = t_spl[0]
  296. t_size = int(t_spl[1].rstrip('\n'))
  297. t_db_file = STAT_DIR+t_name+'.db'
  298.  
  299. if int(t_size) >= MIN_TBL_SIZE:
  300. t_db = db_read(t_db_file)
  301.  
  302. growth_1_day_mb = int(float(t_db[-1].split(',')[2]) / 1024 / 1024)
  303. growth_7_day_mb = int(float(t_db[-1].split(',')[4]) / 1024 / 1024)
  304. growth_30_day_mb = int(float(t_db[-1].split(',')[6]) / 1024 / 1024)
  305. growth_1_day_p = t_db[-1].split(',')[3]
  306. growth_7_day_p = t_db[-1].split(',')[5]
  307. growth_30_day_p = t_db[-1].split(',')[7]
  308.  
  309. if growth_1_day_mb > 0:
  310. entry_growth1 = "%s %s %s%%\n" % (
  311. growth_1_day_mb, t_name, growth_1_day_p)
  312. top_by_1_day_growth_list.append(entry_growth1)
  313.  
  314. if growth_7_day_mb > 0:
  315. entry_growth7 = "%s %s %s%%\n" % (
  316. growth_7_day_mb, t_name, growth_7_day_p)
  317. top_by_7_day_growth_list.append(entry_growth7)
  318.  
  319. if growth_30_day_mb > 0:
  320. entry_growth30 = "%s %s %s%%\n" % (
  321. growth_30_day_mb, t_name, growth_7_day_p)
  322. top_by_30_day_growth_list.append(entry_growth30)
  323.  
  324. # Make a notification:
  325. top_by_1_day_growth_sorted = sorted(
  326. top_by_1_day_growth_list, key=lambda x: int(
  327. x[:x.index(' ')]), reverse=True)
  328. top_by_7_day_growth_sorted = sorted(
  329. top_by_7_day_growth_list, key=lambda x: int(
  330. x[:x.index(' ')]), reverse=True)
  331. top_by_30_day_growth_sorted = sorted(
  332. top_by_30_day_growth_list, key=lambda x: int(
  333. x[:x.index(' ')]), reverse=True)
  334.  
  335. report_1 = ''.join(top_by_1_day_growth_sorted[:NUM_LINES_REPORT-1])
  336. report_7 = ''.join(top_by_7_day_growth_sorted[:NUM_LINES_REPORT-1])
  337. report_30 = ''.join(top_by_30_day_growth_sorted[:NUM_LINES_REPORT-1])
  338.  
  339. delim = (46 * '-')+'\n'
  340. completed_report = top_by_current_size+'\n'
  341. completed_report += '\n## Top tables by growth 1 day (in MB) '
  342. completed_report += '##\n%s%s\n' % (delim, report_1)
  343. completed_report += '## Top tables by growth 7 days (in MB) '
  344. completed_report += '##\n%s%s\n' % (delim, report_7)
  345. completed_report += '## Top tables by growth 30 days (in MB) '
  346. completed_report += '##\n%s%s' % (delim, report_30)
  347.  
  348. # Send a notification:
  349. mail_sbj = '%s: table growth statistic information' % HOSTNAME
  350. send_mail(mail_sbj, completed_report)
  351.  
  352. sys.exit(0)
Add Comment
Please, Sign In to add comment