Guest User

Untitled

a guest
May 31st, 2016
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.20 KB | None | 0 0
  1. #! /usr/bin/env ruby
  2.  
  3. # -----------------------
  4. # Author: Andreas Paul (xorpaul) <xorpaul@gmail.com>
  5. # Date: 2014-02-05 17:06
  6. # Version: 0.1
  7. # -----------------------
  8. #
  9. # http://docs.puppetlabs.com/puppetdb/latest/api/query/v3/metrics.html
  10.  
  11. require 'rubygems'
  12. require 'optparse'
  13. require 'open-uri'
  14. require 'uri'
  15. require 'json'
  16. require 'socket'
  17. require 'timeout'
  18. require 'open-uri'
  19. require 'net/http'
  20.  
  21. $debug = false
  22. $checkmk = false
  23. $host = ''
  24. $timeout = 5
  25. $port = 8080
  26. $sslport = $port + 1
  27. $queuewarn = 500
  28. $queuecrit = 2000
  29. $cmd_p_secwarn = -1
  30. $cmd_p_seccrit = -1
  31.  
  32. opt = OptionParser.new
  33. opt.on("--debug", "-d", "print debug information, defaults to #{$debug}") do |f|
  34. $debug = true
  35. end
  36. opt.on("--checkmk", "append HTML </br> to each line in the long output to display line breaks in the check_mk GUI, defaults to #{$checkmk}") do |c|
  37. $checkmk = true
  38. end
  39. opt.on("--host [PUPPETDBSERVER]", "-H", "Your PuppetDB hostname, MANDATORY parameter") do |host_p|
  40. $host = host_p
  41. end
  42. opt.on("--port [PORT]", "-p", Integer, "Your PuppetDB port, defaults to #{$port}") do |port_p|
  43. $port = port_p
  44. end
  45. opt.on("--sslport [SSLPORT]", "-s", Integer, "Your PuppetDB SSL port, defaults to #{$port + 1}") do |sslport_p|
  46. $sslport = sslport_p
  47. end
  48. opt.on("--timeout [SECONDS]", "-t", Integer, "Timeout for each HTTP GET request, defaults to #{$timeout} seconds") do |timeout_p|
  49. $timeout = timeout_p
  50. end
  51. opt.on("--queuewarn [WARNTHRESHOLD]", Integer, "WARNING threshold for PuppetDB queue size, defaults to #{$queuewarn}") do |qw_p|
  52. $queuewarn = qw_p
  53. end
  54. opt.on("--queuecrit [CRITTHRESHOLD]", Integer, "CRITICAL threshold for PuppetDB queue size, defaults to #{$queuecrit}") do |qc_p|
  55. $queuecrit = qc_p
  56. end
  57. opt.on("--cmd_p_secwarn [WARNTHRESHOLD]", Float, "WARNING threshold for Commands processed per second, defaults to #{$cmd_p_secwarn} cmds/s") do |cw_p|
  58. $cmd_p_secwarn = cw_p
  59. end
  60. opt.on("--cmd_p_seccrit [CRITTHRESHOLD]", Float, "CRITICAL threshold for Commands processed per second, defaults to #{$cmd_p_seccrit} cmds/s") do |cc_p|
  61. $cmd_p_seccrit = cc_p
  62. end
  63. opt.parse!
  64.  
  65. if $host == '' || $host == nil
  66. puts 'ERROR: Please specify your PuppetDB server with -H <PUPPETDBSERVER>'
  67. puts "Example: #{__FILE__} -H puppetdb.domain.tld"
  68. puts opt
  69. exit 3
  70. end
  71.  
  72. # http://stackoverflow.com/a/517638/682847
  73. def is_port_open?(ip, port)
  74. begin
  75. Timeout::timeout($timeout) do
  76. begin
  77. s = TCPSocket.new(ip, port)
  78. s.close
  79. return true
  80. rescue Errno::ECONNREFUSED, Errno::EHOSTUNREACH
  81. return false
  82. end
  83. end
  84. rescue Timeout::Error
  85. end
  86.  
  87. return false
  88. end
  89.  
  90. # http://grosser.it/2008/10/25/numbers-for-humans-humanize-for-numeric/
  91. class Numeric
  92. def humanize(rounding=2,delimiter=',',separator='.')
  93. value = respond_to?(:round_with_precision) ? round(rounding) : self
  94. #see number with delimeter
  95. parts = value.to_s.split('.')
  96. parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
  97. parts.join separator
  98. end
  99. end
  100.  
  101. def doRequest(url)
  102. out = {'returncode' => 0}
  103. puts "sending GET to #{url}" if $debug
  104. begin
  105. uri = URI.parse(url)
  106. response = uri.read(:read_timeout => $timeout)
  107. puts "Response: #{response}" if $debug
  108. out['data'] = JSON.load(response)
  109. rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, Errno::ECONNREFUSED,
  110. Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError => e
  111. out['text'] = "WARNING: Error '#{e}' while sending request to #{url}"
  112. out['returncode'] = 1
  113. end
  114. puts "Parsed: #{out['data']}" if $debug
  115. return out
  116. end
  117.  
  118. def commandProcessingMetrics(host, port, warn, crit)
  119. result = {'perfdata' => ''}
  120. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.command:type=global,name=processing-time"
  121. data = doRequest(url)
  122. if data['returncode'] == 0
  123. oneMinuteRate = data['data']['OneMinuteRate'].round(3)
  124. fiveMinuteRate = data['data']['FiveMinuteRate'].round(3)
  125. fifteenMinuteRate = data['data']['FifteenMinuteRate'].round(3)
  126. fiftyPercentile = data['data']['50thPercentile'].round(3)
  127. if fiveMinuteRate <= crit
  128. text = 'CRITICAL: '
  129. rc = 2
  130. elsif fiveMinuteRate <= warn
  131. text = 'WARNING: '
  132. rc = 1
  133. else
  134. text = ''
  135. rc = 0
  136. end
  137. result['text'] = "#{text}#{fiveMinuteRate} cmds/s \n#{fiftyPercentile} ms/cmd"
  138. result['returncode'] = rc
  139. result['perfdata'] = "\ncmd_time_1m: #{oneMinuteRate} ms\ncmd_time_5m: #{fiveMinuteRate} ms\ncmd_time_15m: #{fifteenMinuteRate} ms\ncmd_percent_50: #{fiftyPercentile} ms"
  140. else
  141. result['text'] = data['text']
  142. result['returncode'] = data['returncode']
  143. end
  144. return result
  145. end
  146.  
  147. def databaseMetrics(host, port)
  148. result = {'perfdata' => '', 'returncode' => 0}
  149. url = "http://#{host}:#{port}/v3/metrics/mbean/com.jolbox.bonecp:type=BoneCP"
  150. data = doRequest(url)
  151. if data['returncode'] == 0
  152. totalCreatedConnections = data['data']['TotalCreatedConnections']
  153. totalLeased = data['data']['TotalLeased']
  154. statementExecuteTimeAvg = data['data']['StatementExecuteTimeAvg'].round(3)
  155. statementPrepareTimeAvg = data['data']['StatementPrepareTimeAvg'].round(3)
  156. result['text'] = "used DB connections: #{totalLeased}"
  157. result['perfdata'] = "\nmax_connections: #{totalCreatedConnections} \nused_connections: #{totalLeased} \ndb_exec_avg_time: #{statementExecuteTimeAvg} ms \ndb_prepare_avg_time: #{statementPrepareTimeAvg} ms"
  158. else
  159. result['text'] = data['text']
  160. result['returncode'] = data['returncode']
  161. end
  162. return result
  163. end
  164.  
  165. def JvmMetrics(host, port)
  166. result = {'perfdata' => '', 'returncode' => 0}
  167. url = "http://#{host}:#{port}/v3/metrics/mbean/java.lang:type=Memory"
  168. data = doRequest(url)
  169. if data['returncode'] == 0
  170. heapMemoryUsage_used = data['data']['HeapMemoryUsage']['used']
  171. heapMemoryUsage_max = data['data']['HeapMemoryUsage']['max']
  172. result['text'] = "JVM #{heapMemoryUsage_used / 1024 / 1024}MB"
  173. result['perfdata'] = "\njvm_used: #{heapMemoryUsage_used / 1024 / 1024} MB \njvm_max: #{heapMemoryUsage_max / 1024 / 1024} MB"
  174. else
  175. result['text'] = data['text']
  176. result['returncode'] = data['returncode']
  177. end
  178. return result
  179. end
  180.  
  181. def commandProcessedMetrics(host, port)
  182. result = {'perfdata' => '', 'returncode' => 0}
  183. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.command:type=global,name=processed"
  184. data = doRequest(url)
  185. if data['returncode'] == 0
  186. processed = data['data']['Count']
  187. result['text'] = "processed: #{processed}"
  188. result['perfdata'] = "\nprocessed: #{processed}"
  189. else
  190. result['text'] = data['text']
  191. result['returncode'] = data['returncode']
  192. end
  193. return result
  194. end
  195.  
  196. def commandRetriedMetrics(host, port)
  197. result = {'perfdata' => '', 'returncode' => 0}
  198. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.command:type=global,name=retried"
  199. data = doRequest(url)
  200. if data['returncode'] == 0
  201. retried = data['data']['Count']
  202. result['text'] = "retried: #{retried}"
  203. result['perfdata'] = "\nretried: #{retried}"
  204. else
  205. result['text'] = data['text']
  206. result['returncode'] = data['returncode']
  207. end
  208. return result
  209. end
  210.  
  211. def queueMetrics(host, port, warn, crit)
  212. result = {'perfdata' => '', 'returncode' => 0}
  213. url = "http://#{host}:#{port}/v3/metrics/mbean/org.apache.activemq:BrokerName=localhost,Type=Queue,Destination=com.puppetlabs.puppetdb.commands"
  214. data = doRequest(url)
  215. if data['returncode'] == 0
  216. queueSize = data['data']['QueueSize']
  217. threads = data['data']['ConsumerCount']
  218. if queueSize >= crit
  219. text = 'CRITICAL: '
  220. rc = 2
  221. elsif queueSize >= warn
  222. text = 'WARNING: '
  223. rc = 1
  224. else
  225. text = ''
  226. rc = 0
  227. end
  228. result['text'] = "#{text}Queue size: #{queueSize.humanize} \nthreads: #{threads}"
  229. result['returncode'] = rc
  230. result['perfdata'] = "\nqueue_size: #{queueSize} \nthreads: #{threads}"
  231. else
  232. result['text'] = data['text']
  233. result['returncode'] = data['returncode']
  234. end
  235. return result
  236. end
  237.  
  238. def catalogDuplicatesMetrics(host, port)
  239. result = {'perfdata' => '', 'returncode' => 0}
  240. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.scf.storage:type=default,name=duplicate-pct"
  241. data = doRequest(url)
  242. if data['returncode'] == 0
  243. c_dup_perc = (data['data']['Value'] * 100)
  244. result['text'] = "Catalog duplication: #{c_dup_perc.round(1)}%"
  245. result['perfdata'] = "\ncatalog_duplication: #{c_dup_perc.round(3)} %"
  246. else
  247. result['text'] = data['text']
  248. result['returncode'] = data['returncode']
  249. end
  250. return result
  251. end
  252.  
  253. def resourceDuplicatesMetrics(host, port)
  254. result = {'perfdata' => '', 'returncode' => 0}
  255. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.query.population:type=default,name=pct-resource-dupes"
  256. data = doRequest(url)
  257. if data['returncode'] == 0
  258. c_dup_perc = (data['data']['Value'] * 100)
  259. result['text'] = "Resource duplication: #{c_dup_perc.round(1)}%"
  260. result['perfdata'] = "\nresource_duplication: #{c_dup_perc.round(3)} %"
  261. else
  262. result['text'] = data['text']
  263. result['returncode'] = data['returncode']
  264. end
  265. return result
  266. end
  267.  
  268. def populationNodesMetrics(host, port)
  269. result = {'perfdata' => '', 'returncode' => 0}
  270. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.query.population:type=default,name=num-nodes"
  271. data = doRequest(url)
  272. if data['returncode'] == 0
  273. num_nodes = data['data']['Value']
  274. result['text'] = "nodes: #{num_nodes}"
  275. result['perfdata'] = "\nnum_nodes: #{num_nodes}"
  276. else
  277. result['text'] = data['text']
  278. result['returncode'] = data['returncode']
  279. end
  280. return result
  281. end
  282.  
  283. def populationResourcesMetrics(host, port)
  284. result = {'perfdata' => '', 'returncode' => 0}
  285. url = "http://#{host}:#{port}/v3/metrics/mbean/com.puppetlabs.puppetdb.query.population:type=default,name=num-resources"
  286. data = doRequest(url)
  287. if data['returncode'] == 0
  288. num_nodes = data['data']['Value']
  289. result['text'] = "resources1: #{num_nodes}"
  290. result['perfdata'] = "\nresources: #{num_nodes}"
  291. else
  292. result['text'] = data['text']
  293. result['returncode'] = data['returncode']
  294. end
  295. return result
  296. end
  297.  
  298. results = []
  299.  
  300. # Check if plain HTTP port is open
  301. skip_checks = false
  302. if ! is_port_open?($host, $port)
  303. # skip all metric checks
  304. skip_checks = true
  305. results << {'text' => "CRITICAL: Could not connect to plain HTTP port #{$host}:#{$port}", 'returncode' => 2}
  306. end
  307.  
  308. # Check if plain SSL port is open
  309. if ! is_port_open?($host, $sslport)
  310. # don't skip metric checks, but add CRITICAL result
  311. results << {'text' => "CRITICAL: Could not connect to SSL port #{$host}:#{$sslport}", 'returncode' => 2}
  312. end
  313.  
  314. if ! skip_checks
  315. if $debug == false
  316. # threading
  317. threads = []
  318. threads << Thread.new{ results << commandProcessingMetrics($host, $port, $cmd_p_secwarn, $cmd_p_seccrit) }
  319. threads << Thread.new{ results << commandProcessedMetrics($host, $port) }
  320. threads << Thread.new{ results << commandRetriedMetrics($host, $port) }
  321. threads << Thread.new{ results << databaseMetrics($host, $port) }
  322. threads << Thread.new{ results << JvmMetrics($host, $port) }
  323. threads << Thread.new{ results << queueMetrics($host, $port, $queuewarn, $queuecrit) }
  324. threads << Thread.new{ results << catalogDuplicatesMetrics($host, $port) }
  325. threads << Thread.new{ results << populationNodesMetrics($host, $port) }
  326. # I only began querying this after updating to PuppetDB 1.6, otherwise it was too slow
  327. threads << Thread.new{ results << populationResourcesMetrics($host, $port) }
  328. # This is also rather costly (adds more than 2 seconds for me)
  329. threads << Thread.new{ results << resourceDuplicatesMetrics($host, $port) }
  330.  
  331. threads.each do |t|
  332. t.join
  333. end
  334. else
  335. results << commandProcessingMetrics($host, $port, $cmd_p_secwarn, $cmd_p_seccrit)
  336. results << commandProcessedMetrics($host, $port)
  337. results << commandRetriedMetrics($host, $port)
  338. results << databaseMetrics($host, $port)
  339. results << JvmMetrics($host, $port)
  340. results << queueMetrics($host, $port, $queuewarn, $queuecrit)
  341. results << catalogDuplicatesMetrics($host, $port)
  342. results << populationNodesMetrics($host, $port)
  343. # I only began querying this after updating to PuppetDB 1.6, otherwise it was too slow
  344. results << populationResourcesMetrics($host, $port)
  345. # This is also rather costly (adds more than 2 seconds for me)
  346. results << resourceDuplicatesMetrics($host, $port)
  347. end
  348. end
  349.  
  350. puts results if $debug
  351.  
  352. # Aggregate check results
  353. output = {}
  354. output['returncode'] = 0
  355. output['text'] = ''
  356. output['text_if_ok'] = ''
  357. output['multiline'] = ''
  358. output['perfdata'] = ''
  359. puppetdb_still_alive = false
  360. results.each do |result|
  361. output['perfdata'] += "#{result['perfdata']} " if result['perfdata'] != ''
  362. if result['returncode'] >= 1
  363. if ! result['text'].start_with?('Error \'Timeout::Error\' while sending ') and ! puppetdb_still_alive
  364. puppetdb_still_alive = true
  365. else
  366. puppetdb_still_alive = false
  367. end
  368. output['text'] += "#{result['text']} "
  369. case result['returncode']
  370. when 3
  371. output['returncode'] = 3 if result['returncode'] > output['returncode']
  372. when 2
  373. output['returncode'] = 2 if result['returncode'] > output['returncode']
  374. when 1
  375. output['returncode'] = 1 if result['returncode'] > output['returncode']
  376. end
  377. else
  378. puppetdb_still_alive = true
  379. output['text_if_ok'] += "#{result['text']} "
  380. br = ''
  381. br = '</br>' if $checkmk
  382. #output['multiline'] += "#{result['text']}#{br}\n"
  383. end
  384. end
  385.  
  386. # if all check receive a timeout error then the PuppetDB is non functioning
  387. if ! puppetdb_still_alive
  388. output['text'] = 'CRITICAL: Received only timeout errors, PuppetDB is not responding anymore, try restarting'
  389. output['returncode'] = 2
  390. end
  391.  
  392. if output['text'] == ''
  393. output['text'] = output['text_if_ok']
  394. end
  395.  
  396. #puts "#{output['text']}\n#{output['multiline'].chomp()}|#{output['perfdata']}\n#{output['multiline'].chomp()}"
  397. puts "#{output['perfdata']}\n#{output['multiline'].chomp()}"
  398.  
  399. exit
  400. #exit output['returncode']
Add Comment
Please, Sign In to add comment