1. node02 gmond.conf
  2.  
  3. /* This configuration is as close to 2.5.x default behavior as possible
  4. The values closely match ./gmond/metric.h definitions in 2.5.x */
  5. globals {
  6. daemonize = yes
  7. setuid = yes
  8. user = ganglia
  9. debug_level = 0
  10. max_udp_msg_len = 1472
  11. mute = no
  12. deaf = no
  13. host_dmax = 0 /*secs */
  14. cleanup_threshold = 300 /*secs */
  15. gexec = no
  16. send_metadata_interval = 0
  17. }
  18.  
  19. /* If a cluster attribute is specified, then all gmond hosts are wrapped inside
  20. * of a <CLUSTER> tag. If you do not specify a cluster tag, then all <HOSTS> will
  21. * NOT be wrapped inside of a <CLUSTER> tag. */
  22. cluster {
  23. name = "CPU cluster"
  24. owner = "Propietat de FBM"
  25. latlong = "unspecified"
  26. url = "unspecified"
  27. }
  28.  
  29. /* The host section describes attributes of the host, like the location */
  30. host {
  31. location = "suport.informatica@example.org"
  32. }
  33.  
  34. /* Feel free to specify as many udp_send_channels as you like. Gmond
  35. used to only support having a single channel */
  36. udp_send_channel {
  37. mcast_join = 239.2.11.71
  38. port = 8649
  39. ttl = 1
  40. }
  41.  
  42. /* You can specify as many udp_recv_channels as you like as well. */
  43. udp_recv_channel {
  44. mcast_join = 239.2.11.71
  45. port = 8649
  46. bind = 239.2.11.71
  47. }
  48.  
  49. /* You can specify as many tcp_accept_channels as you like to share
  50. an xml description of the state of the cluster */
  51. tcp_accept_channel {
  52. port = 8649
  53. }
  54.  
  55. /* Each metrics module that is referenced by gmond must be specified and
  56. loaded. If the module has been statically linked with gmond, it does not
  57. require a load path. However all dynamically loadable modules must include
  58. a load path. */
  59. modules {
  60. module {
  61. name = "core_metrics"
  62. }
  63. module {
  64. name = "cpu_module"
  65. path = "/usr/lib/ganglia/modcpu.so"
  66. }
  67. module {
  68. name = "disk_module"
  69. path = "/usr/lib/ganglia/moddisk.so"
  70. }
  71. module {
  72. name = "load_module"
  73. path = "/usr/lib/ganglia/modload.so"
  74. }
  75. module {
  76. name = "mem_module"
  77. path = "/usr/lib/ganglia/modmem.so"
  78. }
  79. module {
  80. name = "net_module"
  81. path = "/usr/lib/ganglia/modnet.so"
  82. }
  83. module {
  84. name = "proc_module"
  85. path = "/usr/lib/ganglia/modproc.so"
  86. }
  87. module {
  88. name = "sys_module"
  89. path = "/usr/lib/ganglia/modsys.so"
  90. }
  91. }
  92.  
  93. include ('/etc/ganglia/conf.d/*.conf')
  94.  
  95.  
  96. /* The old internal 2.5.x metric array has been replaced by the following
  97. collection_group directives. What follows is the default behavior for
  98. collecting and sending metrics that is as close to 2.5.x behavior as
  99. possible. */
  100.  
  101. /* This collection group will cause a heartbeat (or beacon) to be sent every
  102. 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses
  103. the age of the running gmond. */
  104. collection_group {
  105. collect_once = yes
  106. time_threshold = 20
  107. metric {
  108. name = "heartbeat"
  109. }
  110. }
  111.  
  112. /* This collection group will send general info about this host every 1200 secs.
  113. This information doesn't change between reboots and is only collected once. */
  114. collection_group {
  115. collect_once = yes
  116. time_threshold = 1200
  117. metric {
  118. name = "cpu_num"
  119. title = "CPU Count"
  120. }
  121. metric {
  122. name = "cpu_speed"
  123. title = "CPU Speed"
  124. }
  125. metric {
  126. name = "mem_total"
  127. title = "Memory Total"
  128. }
  129. /* Should this be here? Swap can be added/removed between reboots. */
  130. metric {
  131. name = "swap_total"
  132. title = "Swap Space Total"
  133. }
  134. metric {
  135. name = "boottime"
  136. title = "Last Boot Time"
  137. }
  138. metric {
  139. name = "machine_type"
  140. title = "Machine Type"
  141. }
  142. metric {
  143. name = "os_name"
  144. title = "Operating System"
  145. }
  146. metric {
  147. name = "os_release"
  148. title = "Operating System Release"
  149. }
  150. metric {
  151. name = "location"
  152. title = "Location"
  153. }
  154. }
  155.  
  156. /* This collection group will send the status of gexecd for this host every 300 secs */
  157. /* Unlike 2.5.x the default behavior is to report gexecd OFF. */
  158. collection_group {
  159. collect_once = yes
  160. time_threshold = 300
  161. metric {
  162. name = "gexec"
  163. title = "Gexec Status"
  164. }
  165. }
  166.  
  167. /* This collection group will collect the CPU status info every 20 secs.
  168. The time threshold is set to 90 seconds. In honesty, this time_threshold could be
  169. set significantly higher to reduce unneccessary network chatter. */
  170. collection_group {
  171. collect_every = 20
  172. time_threshold = 90
  173. /* CPU status */
  174. metric {
  175. name = "cpu_user"
  176. value_threshold = "1.0"
  177. title = "CPU User"
  178. }
  179. metric {
  180. name = "cpu_system"
  181. value_threshold = "1.0"
  182. title = "CPU System"
  183. }
  184. metric {
  185. name = "cpu_idle"
  186. value_threshold = "5.0"
  187. title = "CPU Idle"
  188. }
  189. metric {
  190. name = "cpu_nice"
  191. value_threshold = "1.0"
  192. title = "CPU Nice"
  193. }
  194. metric {
  195. name = "cpu_aidle"
  196. value_threshold = "5.0"
  197. title = "CPU aidle"
  198. }
  199. metric {
  200. name = "cpu_wio"
  201. value_threshold = "1.0"
  202. title = "CPU wio"
  203. }
  204. /* The next two metrics are optional if you want more detail...
  205. ... since they are accounted for in cpu_system.
  206. metric {
  207. name = "cpu_intr"
  208. value_threshold = "1.0"
  209. title = "CPU intr"
  210. }
  211. metric {
  212. name = "cpu_sintr"
  213. value_threshold = "1.0"
  214. title = "CPU sintr"
  215. }
  216. */
  217. }
  218.  
  219. collection_group {
  220. collect_every = 20
  221. time_threshold = 90
  222. /* Load Averages */
  223. metric {
  224. name = "load_one"
  225. value_threshold = "1.0"
  226. title = "One Minute Load Average"
  227. }
  228. metric {
  229. name = "load_five"
  230. value_threshold = "1.0"
  231. title = "Five Minute Load Average"
  232. }
  233. metric {
  234. name = "load_fifteen"
  235. value_threshold = "1.0"
  236. title = "Fifteen Minute Load Average"
  237. }
  238. }
  239.  
  240. /* This group collects the number of running and total processes */
  241. collection_group {
  242. collect_every = 80
  243. time_threshold = 950
  244. metric {
  245. name = "proc_run"
  246. value_threshold = "1.0"
  247. title = "Total Running Processes"
  248. }
  249. metric {
  250. name = "proc_total"
  251. value_threshold = "1.0"
  252. title = "Total Processes"
  253. }
  254. }
  255.  
  256. /* This collection group grabs the volatile memory metrics every 40 secs and
  257. sends them at least every 180 secs. This time_threshold can be increased
  258. significantly to reduce unneeded network traffic. */
  259. collection_group {
  260. collect_every = 40
  261. time_threshold = 180
  262. metric {
  263. name = "mem_free"
  264. value_threshold = "1024.0"
  265. title = "Free Memory"
  266. }
  267. metric {
  268. name = "mem_shared"
  269. value_threshold = "1024.0"
  270. title = "Shared Memory"
  271. }
  272. metric {
  273. name = "mem_buffers"
  274. value_threshold = "1024.0"
  275. title = "Memory Buffers"
  276. }
  277. metric {
  278. name = "mem_cached"
  279. value_threshold = "1024.0"
  280. title = "Cached Memory"
  281. }
  282. metric {
  283. name = "swap_free"
  284. value_threshold = "1024.0"
  285. title = "Free Swap Space"
  286. }
  287. }
  288.  
  289. collection_group {
  290. collect_every = 40
  291. time_threshold = 300
  292. metric {
  293. name = "bytes_out"
  294. value_threshold = 4096
  295. title = "Bytes Sent"
  296. }
  297. metric {
  298. name = "bytes_in"
  299. value_threshold = 4096
  300. title = "Bytes Received"
  301. }
  302. metric {
  303. name = "pkts_in"
  304. value_threshold = 256
  305. title = "Packets Received"
  306. }
  307. metric {
  308. name = "pkts_out"
  309. value_threshold = 256
  310. title = "Packets Sent"
  311. }
  312. }
  313.  
  314. /* Different than 2.5.x default since the old config made no sense */
  315. collection_group {
  316. collect_every = 1800
  317. time_threshold = 3600
  318. metric {
  319. name = "disk_total"
  320. value_threshold = 1.0
  321. title = "Total Disk Space"
  322. }
  323. }
  324.  
  325. collection_group {
  326. collect_every = 40
  327. time_threshold = 180
  328. metric {
  329. name = "disk_free"
  330. value_threshold = 1.0
  331. title = "Disk Space Available"
  332. }
  333. metric {
  334. name = "part_max_used"
  335. value_threshold = 1.0
  336. title = "Maximum Disk Space Used"
  337. }
  338. }