Guest User

Untitled

a guest
Mar 10th, 2019
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.80 KB | None | 0 0
  1. # Create the datadog user with select only permissions:
  2. # CREATE USER datadog WITH PASSWORD '<complex_password>';
  3. #
  4. # Grant select permissions on a table or view that you want to monitor:
  5. # GRANT SELECT ON <schema>.<table> TO datadog;
  6. #
  7. # Grant permissions for a specific column on a table or view that you want to monitor:
  8. # GRANT SELECT (id, name) ON <schema>.<table> TO datadog;
  9. #
  10. # Let non-superusers look at pg_stat_activity in a read-only fashon.
  11. # Create this function as a superuser for the database:
  12. #
  13. # CREATE OR REPLACE FUNCTION public.pg_stat_activity() RETURNS SETOF pg_catalog.pg_stat_activity AS $BODY$
  14. # DECLARE
  15. # rec RECORD;
  16. # BEGIN
  17. # FOR rec IN SELECT * FROM pg_stat_activity
  18. # LOOP
  19. # RETURN NEXT rec;
  20. # END LOOP;
  21. # RETURN;
  22. # END;
  23. # $BODY$ LANGUAGE plpgsql SECURITY DEFINER;
  24. #
  25. # Enable stats collection for functions:
  26. # set track_functions to 'pl';
  27.  
  28. from checks import AgentCheck
  29.  
  30. import psycopg2 as pg
  31.  
  32. class PostgresqlCheck(AgentCheck):
  33. def __init__(self, name, init_config, agentConfig):
  34. AgentCheck.__init__(self, name, init_config, agentConfig)
  35.  
  36. self.name = init_config.get('name', 'postgresql')
  37.  
  38. def check(self, instance):
  39. self.log.info('Starting PostgreSQL Check')
  40.  
  41. tags = instance.get('tags', [])
  42. host = instance.get('host', 'localhost')
  43. port = instance.get('port', '5432')
  44. username = instance.get('username')
  45. password = instance.get('password')
  46. database = instance.get('database')
  47.  
  48. tags = tags + ['database:%s' % (database)]
  49.  
  50. self.log.info('Connecting to PostgreSQL')
  51.  
  52. db = pg.connect(host=host, port=port, user=username, password=password, database=database)
  53. cu = db.cursor()
  54.  
  55. # Start Collecting Table Stats
  56. self.log.info('Collecting Table Stats')
  57.  
  58. cu.execute(self.query_table_stats())
  59.  
  60. for stat in cu.fetchall():
  61. (
  62. schemaname,
  63. relname,
  64. relsize,
  65. total_relsize,
  66. reltuples,
  67. relpages,
  68. avg_tuplesize,
  69. seq_scan,
  70. idx_scan,
  71. per_idx_scan,
  72. per_rel_hit,
  73. per_idx_hit,
  74. n_tup_ins,
  75. n_tup_upd,
  76. n_tup_hot_upd,
  77. per_hot_upd,
  78. n_tup_del,
  79. n_live_tup,
  80. n_dead_tup,
  81. per_deadfill
  82. ) = stat
  83.  
  84. self.gauge('%s.table.relsize' % (self.name), relsize, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  85. self.gauge('%s.table.total_relsize' % (self.name), total_relsize, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  86. self.gauge('%s.table.reltuples' % (self.name), reltuples, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  87. self.gauge('%s.table.relpages' % (self.name), relpages, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  88. self.gauge('%s.table.avg_tuplesize' % (self.name), avg_tuplesize, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  89. self.rate( '%s.table.seq_scan' % (self.name), seq_scan, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  90. self.rate( '%s.table.idx_scan' % (self.name), idx_scan, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  91. self.gauge('%s.table.per_idx_scan' % (self.name), per_idx_scan, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  92. self.gauge('%s.table.per_rel_hit' % (self.name), per_rel_hit, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  93. self.gauge('%s.table.per_idx_hit' % (self.name), per_idx_hit, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  94. self.rate( '%s.table.n_tup_ins' % (self.name), n_tup_ins, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  95. self.rate( '%s.table.n_tup_upd' % (self.name), n_tup_upd, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  96. self.rate( '%s.table.n_tup_hot_upd' % (self.name), n_tup_hot_upd, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  97. self.gauge('%s.table.per_hot_upd' % (self.name), per_hot_upd, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  98. self.rate( '%s.table.n_tup_del' % (self.name), n_tup_del, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  99. self.gauge('%s.table.n_live_tup' % (self.name), n_live_tup, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  100. self.gauge('%s.table.n_dead_tup' % (self.name), n_dead_tup, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  101. self.gauge('%s.table.per_deadfill' % (self.name), per_deadfill, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname)]))
  102.  
  103.  
  104. # Start Collecting Index Stats
  105. self.log.info('Collecting Index Stats')
  106.  
  107. cu.execute(self.query_index_stats())
  108.  
  109. for stat in cu.fetchall():
  110. (
  111. schemaname,
  112. relname,
  113. idxname,
  114. idx_scan,
  115. idx_tup_read,
  116. idx_tup_fetch
  117. ) = stat
  118.  
  119. self.rate('%s.index.idx_scan' % (self.name), idx_scan, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname), 'index:%s' % (idxname)]))
  120. self.rate('%s.index.idx_tup_read' % (self.name), idx_tup_read, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname), 'index:%s' % (idxname)]))
  121. self.rate('%s.index.idx_tup_fetch' % (self.name), idx_tup_fetch, tags = (tags + ['schema:%s' % (schemaname), 'table:%s' % (relname), 'index:%s' % (idxname)]))
  122.  
  123. # Start Collecting Function Stats
  124. self.log.info('Collecting Function Stats')
  125.  
  126. cu.execute(self.query_function_stats())
  127.  
  128. for stat in cu.fetchall():
  129. (
  130. schemaname,
  131. funcname,
  132. calls,
  133. self_time,
  134. total_time
  135. ) = stat
  136.  
  137. self.rate('%s.function.calls' % (self.name) , calls, tags = (tags + ['schema:%s' % (schemaname), 'function:%s' % (funcname)]))
  138. self.rate('%s.function.self_time' % (self.name) , self_time, tags = (tags + ['schema:%s' % (schemaname), 'function:%s' % (funcname)]))
  139. self.rate('%s.function.total_time' % (self.name), total_time, tags = (tags + ['schema:%s' % (schemaname), 'function:%s' % (funcname)]))
  140.  
  141. # Start Collecting Database Stats
  142. self.log.info('Collecting Database Stats')
  143.  
  144. cu.execute(self.query_database_stats(database))
  145.  
  146. for stat in cu.fetchall():
  147. (
  148. datname,
  149. xact_commit,
  150. xact_rollback,
  151. blks_read,
  152. blks_hit
  153. ) = stat
  154.  
  155. self.rate('%s.database.xact_commit' % (self.name), xact_commit, tags = tags)
  156. self.rate('%s.database.xact_rollback' % (self.name), xact_rollback, tags = tags)
  157. self.rate('%s.database.blks_read' % (self.name), blks_read, tags = tags)
  158. self.rate('%s.database.blks_hit' % (self.name), blks_hit, tags = tags)
  159.  
  160. # Start Collecting Database Connections
  161. self.log.info('Collecting Database Connections')
  162.  
  163. cu.execute(self.query_connections(database))
  164.  
  165. for stat in cu.fetchall():
  166. (connections) = stat
  167.  
  168. self.gauge('%s.database.connections' % (self.name), connections, tags = tags)
  169.  
  170. # Start Collecting Database Waiting Connections
  171. self.log.info('Collecting Database Waiting Connections')
  172.  
  173. cu.execute(self.query_waiting_connections(database))
  174.  
  175. for stat in cu.fetchall():
  176. (waiting_connections) = stat
  177.  
  178. self.gauge('%s.database.waiting_connections' % (self.name), waiting_connections, tags = tags)
  179.  
  180. # Start Collecting Database Replication Delay
  181. self.log.info('Collecting Database Replication Delay')
  182.  
  183. cu.execute(self.query_replication_delay())
  184.  
  185. for stat in cu.fetchall():
  186. (replication_delay) = stat
  187.  
  188. self.gauge('%s.database.replication_delay' % (self.name), replication_delay, tags = tags)
  189.  
  190. # Start Collecting Heap Memory Stats
  191. self.log.info('Collecting Heap Memory Stats')
  192.  
  193. cu.execute(self.query_heap_memory_stats())
  194.  
  195. for stat in cu.fetchall():
  196. (
  197. heap_read,
  198. heap_hit,
  199. per_heap_ratio
  200. ) = stat
  201.  
  202. self.rate( '%s.database.heap_read' % (self.name), heap_read, tags = tags)
  203. self.rate( '%s.database.heap_hit' % (self.name), heap_hit, tags = tags)
  204. self.gauge('%s.database.per_heap_ratio' % (self.name), per_heap_ratio, tags = tags)
  205.  
  206. # Start Collecting Index Memory Stats
  207. self.log.info('Collecting Index Memory Stats')
  208.  
  209. cu.execute(self.query_index_memory_stats())
  210.  
  211. for stat in cu.fetchall():
  212. (
  213. idx_read,
  214. idx_hit,
  215. per_idx_ratio
  216. ) = stat
  217.  
  218. self.rate( '%s.database.idx_read' % (self.name), idx_read, tags = tags)
  219. self.rate( '%s.database.idx_hit' % (self.name), idx_hit, tags = tags)
  220. self.gauge('%s.database.per_idx_ratio' % (self.name), per_idx_ratio, tags = tags)
  221.  
  222. # Closing PostgreSQL Connection
  223. self.log.info('Closing PostgreSQL Connection')
  224. cu.close()
  225. db.close()
  226.  
  227. def query_table_stats(self):
  228. return """
  229. select
  230. psut.schemaname,
  231. pc.relname,
  232. pg_table_size(pc.relname::varchar) tblsize,
  233. pg_indexes_size(pc.relname::varchar) idxsize,
  234. pg_total_relation_size(pc.relname::varchar) relsize,
  235. pc.reltuples::bigint,
  236. pc.relpages,
  237. coalesce(round((8192 / (nullif(pc.reltuples, 0) / nullif(pc.relpages, 0)))), 0) avg_tuplesize,
  238. psut.seq_scan,
  239. psut.idx_scan,
  240. coalesce(100 * psut.idx_scan / nullif((psut.idx_scan + psut.seq_scan), 0), 0)::int per_idx_scan,
  241. coalesce(100 * psiout.heap_blks_hit / nullif((psiout.heap_blks_hit + psiout.heap_blks_read), 0), 0)::int per_rel_hit,
  242. coalesce(100 * psiout.idx_blks_hit / nullif((psiout.idx_blks_hit + psiout.idx_blks_read), 0), 0)::int per_idx_hit,
  243. psut.n_tup_ins,
  244. psut.n_tup_upd,
  245. psut.n_tup_hot_upd,
  246. coalesce(100 * psut.n_tup_hot_upd / nullif(psut.n_tup_upd, 0), 0)::int per_hot_upd,
  247. psut.n_tup_del,
  248. psut.n_live_tup,
  249. psut.n_dead_tup,
  250. coalesce(100 * psut.n_dead_tup / nullif(psut.n_live_tup, 0), 0)::int per_deadfill
  251. from pg_stat_user_tables psut
  252. inner join pg_statio_user_tables psiout on psiout.relname = psut.relname
  253. inner join pg_class pc on pc.relname = psut.relname
  254. order by pc.relname asc
  255. """
  256.  
  257. def query_index_stats(self):
  258. return """
  259. select
  260. pi.schemaname,
  261. pcr.relname as relname,
  262. pci.relname as idxname,
  263. pg_size_pretty(pg_total_relation_size(pci.relname::varchar)) idxsize_pret,
  264. pg_total_relation_size(pci.relname::varchar) idxsize,
  265. pci.reltuples::bigint idxtuples,
  266. pcr.reltuples::bigint reltuples,
  267. coalesce(100 * pci.reltuples / nullif(pcr.reltuples, 0), 0)::int per_idx_covered,
  268. pi.idx_scan,
  269. pi.idx_tup_read,
  270. pi.idx_tup_fetch
  271. from pg_stat_user_indexes pi
  272. inner join pg_class pci on pci.oid = pi.indexrelid
  273. inner join pg_class pcr on pcr.oid = pi.relid
  274. order by schemaname, relname, idxname
  275. """
  276.  
  277. def query_function_stats(self):
  278. return """
  279. select
  280. schemaname,
  281. funcname,
  282. calls,
  283. self_time,
  284. total_time
  285. from pg_stat_user_functions
  286. where schemaname <> 'pg_catalog'
  287. """
  288.  
  289. def query_database_stats(self, database):
  290. return """
  291. select
  292. datname,
  293. pg_database_size('%s') db_size,
  294. xact_commit,
  295. xact_rollback,
  296. blks_read,
  297. blks_hit
  298. from pg_stat_database where datname = '%s'
  299. """ % (database, database)
  300.  
  301. def query_connections(self, database):
  302. return """
  303. select count(1) connections from pg_stat_activity() where datname = '%s'
  304. """ % (database)
  305.  
  306. def query_waiting_connections(self, database):
  307. return """
  308. select count(1) waiting_connections from pg_stat_activity() where waiting is true and datname = '%s'
  309. """ % (database)
  310.  
  311. def query_replication_delay(self):
  312. return """
  313. select extract(epoch from (now() - pg_last_xact_replay_timestamp())) * 1000 as replication_delay
  314. """
  315.  
  316. def query_heap_memory_stats(self):
  317. return """
  318. select
  319. cast(sum(heap_blks_read) as bigint) heap_read,
  320. cast(sum(heap_blks_hit) as bigint) heap_hit,
  321. coalesce(cast(sum(heap_blks_hit) / nullif((sum(heap_blks_hit) + sum(heap_blks_read)), 0) * 100 as bigint), 0)::int per_heap_ratio
  322. from pg_statio_user_tables
  323. """
  324.  
  325. def query_index_memory_stats(self):
  326. return """
  327. select
  328. cast(sum(idx_blks_read) as bigint) idx_read,
  329. cast(sum(idx_blks_hit) as bigint) idx_hit,
  330. coalesce(cast(sum(idx_blks_hit) / nullif((sum(idx_blks_hit) + sum(idx_blks_read)), 0) * 100 as bigint), 0)::int per_idx_ratio
  331. from pg_statio_user_indexes
  332. """
Add Comment
Please, Sign In to add comment