Advertisement
planzelle

Kill long running sql queries on postges server

Aug 31st, 2015
162
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.63 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3.  
  4. """
  5.     Utility to check for idle running queries over a long time
  6.  
  7.     Can kill the pid and send an email to error-account with
  8.     some infos concerning the killed pids/queries
  9.     Place it on db server. Best used with crontab.
  10.  
  11.     Params:
  12.     -s = Simulate the kill (outputs the queries and some info without touching the pid)
  13.     -k = Kill the long runners pids, send a mail
  14.  
  15.     If both params are set (-s and -k) the -s param is dominant (which means no killing)
  16.     for security reasons
  17.  
  18. """
  19.  
  20. import os
  21. import signal
  22. import psycopg2
  23. from email.mime.text import MIMEText
  24. import smtplib
  25. import sys
  26. import socket
  27. import time
  28. import syslog
  29.  
  30.  
  31. maxMinutes = 240 # 4h
  32. reportMail = 'error@myserver.com'
  33. host = socket.getfqdn()
  34. sender = 'longrunner@'+ host
  35. smtpserver = 'smtp.myserver.com'
  36. usetls = True
  37. smtpusername = ''
  38. smtppassword = ''
  39.  
  40.  
  41. # no config beyond this point needed
  42. debug = 0
  43. kill = 0
  44.  
  45.  
  46. def usage():
  47.     print("\n\nUsage:\n python %s [-s|-k]\n" % (sys.argv[0]))
  48.     print("Kill the long running sql queries (over %s minutes) and \nsend a mail to %s\n" % (maxMinutes, reportMail))
  49.     print("Params:")
  50.     print("-s = simulate (no kill)")
  51.     print("-k = kill long running query pids\n")
  52.     sys.exit(0)
  53.  
  54.  
  55. def terminate(msg):
  56.     msg = "%s [%s]: ERROR - %s" % (time.ctime(), sys.argv[0], msg)
  57.     syslog.syslog(syslog.LOG_ERROR, msg)
  58.     sys.exit(1)
  59.  
  60.  
  61. def sendmail(recipient, subject, content):
  62.     msg = MIMEText(content)
  63.     msg['From'] = sender
  64.     msg['To'] = recipient
  65.     msg['Subject'] = subject
  66.     server = smtplib.SMTP(smtpserver)
  67.     if usetls:
  68.         server.starttls()
  69.     if smtpusername and smtppassword:
  70.         server.login(smtpusername, smtppassword)
  71.     server.sendmail(sender, recipient, msg.as_string())
  72.     server.quit()
  73.  
  74.  
  75. def main():
  76.     try:
  77.         conn = psycopg2.connect("dbname='postgres' host='127.0.0.1' user='postgres' password=''")
  78.     except:
  79.         terminate("Could not connect to local database [127.0.0.1]")
  80.  
  81.     cur = conn.cursor()
  82.     sql = """SELECT
  83.         /* 00 */ pid,
  84.         /* 01 */ datname,
  85.         /* 02 */ client_addr,
  86.         /* 03 */ substring(query,0,160),
  87.         /* 04 */ state,
  88.         /* 05 */ waiting,
  89.         /* 06 */ (now()-query_start) AS elapsed,
  90.         /* 07 */ (extract(EPOCH FROM (now()-query_start))/60)::INTEGER AS minutes
  91.         FROM pg_stat_activity
  92.         WHERE query!='<IDLE>' AND query!='DISCARD ALL'
  93.         ORDER BY elapsed ASC;"""
  94.  
  95.     try:
  96.         cur.execute(sql)
  97.     except:
  98.         terminate("Querying db failed")
  99.  
  100.     try:
  101.         rows = cur.fetchall()
  102.     except:
  103.         terminate("Could not fetch resultset")
  104.  
  105.     counter = 0
  106.     mailbody = ""
  107.     cmdLine = "Manual command on host %s:\nsudo kill " % (host)
  108.  
  109.     for row in rows:
  110.         if row[7] <= maxMinutes:
  111.             continue
  112.  
  113.         # build line with infos for query
  114.         line = """pid=%s minutes=%s state=%s db=%s ip=%s\n\tquery=%s\n\n""" % (row[0], row[7], row[4], row[1], row[2], row[3])
  115.         mailbody += line
  116.         cmdLine += " %s" % (row[0])
  117.         if debug==1:
  118.             print(line)
  119.  
  120.         # only if kill flag is set kill the pid
  121.         if kill==1:
  122.             os.kill(row[0], signal.SIGTERM) #if not working try: SIGKILL
  123.  
  124.         counter += 1
  125.  
  126.     # only if we have killed long runners we send a mail
  127.     if counter > 0:
  128.         # in debug mode output the cmdline to kill the pids manually
  129.         if debug==1:
  130.             print("\n%s\n" % (cmdLine))
  131.  
  132.         if kill==1:
  133.             mailheader = """The following %s pids have been killed [limit= %s minutes]:\n\n""" % (counter, maxMinutes)
  134.             mailbody = mailheader + mailbody
  135.  
  136.             sendmail(reportMail, 'LongRunner: '+ host, mailbody)
  137.  
  138.     sys.exit(0)
  139.  
  140.  
  141.  
  142. if __name__ == '__main__':
  143.     if (len(sys.argv)==1):
  144.         usage()
  145.     else:
  146.         for opt in sys.argv[1:]:
  147.             if opt == '-k':
  148.                 kill=1
  149.                 debug=0
  150.  
  151.             if opt == '-s':
  152.                 debug=1
  153.                 kill=0
  154.  
  155.         main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement