Advertisement
Guest User

Fencing race again in centos6.2 with rhcs_fence

a guest
Feb 3rd, 2012
389
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 27.73 KB | None | 0 0
  1. LOG NODE1
  2. Feb 3 17:48:03 wsguardian1 kernel: e1000e: eth4 NIC Link is Down
  3. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: PingAck did not arrive in time.
  4. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown ) susp( 0 -> 1 )
  5. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: asender terminated
  6. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: Terminating asender thread
  7. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: Connection closed
  8. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: conn( NetworkFailure -> Unconnected )
  9. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: receiver terminated
  10. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: Restarting receiver thread
  11. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: receiver (re)started
  12. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: conn( Unconnected -> WFConnection )
  13. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_db: helper command: /sbin/drbdadm fence-peer wsg_db
  14. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: PingAck did not arrive in time.
  15. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown ) susp( 0 -> 1 )
  16. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: asender terminated
  17. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: Terminating asender thread
  18. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: Connection closed
  19. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: conn( NetworkFailure -> Unconnected )
  20. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: receiver terminated
  21. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: Restarting receiver thread
  22. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: receiver (re)started
  23. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: conn( Unconnected -> WFConnection )
  24. Feb 3 17:48:13 wsguardian1 kernel: d-con wsg_config: helper command: /sbin/drbdadm fence-peer wsg_config
  25. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [uname] is at: [/bin/uname]
  26. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  27. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [fence_node] is at: [/usr/sbin/fence_node]
  28. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  29. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [cman_tool] is at: [/usr/sbin/cman_tool]
  30. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  31. Feb 3 17:48:13 wsguardian1 rhcs_fence: 74; Attempting to fence peer using RHCS from DRBD...
  32. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_RESOURCE] == [wsg_config]
  33. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_MINOR] == [1]
  34. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_PEERS] == [wsguardian2]
  35. Feb 3 17:48:13 wsguardian1 rhcs_fence: 454; DEBUG: shell call: [/usr/sbin/cman_tool status]
  36. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [uname] is at: [/bin/uname]
  37. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  38. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [fence_node] is at: [/usr/sbin/fence_node]
  39. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  40. Feb 3 17:48:13 wsguardian1 rhcs_fence: 125; DEBUG: Checking if: [cman_tool] is at: [/usr/sbin/cman_tool]
  41. Feb 3 17:48:13 wsguardian1 rhcs_fence: 156; DEBUG: Found!
  42. Feb 3 17:48:13 wsguardian1 rhcs_fence: 74; Attempting to fence peer using RHCS from DRBD...
  43. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_RESOURCE] == [wsg_db]
  44. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_MINOR] == [0]
  45. Feb 3 17:48:13 wsguardian1 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_PEERS] == [wsguardian2]
  46. Feb 3 17:48:13 wsguardian1 rhcs_fence: 454; DEBUG: shell call: [/usr/sbin/cman_tool status]
  47. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Version: 6.2.0
  48. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Config Version: 2
  49. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Name: WSGClust
  50. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Id: 30485
  51. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Member: Yes
  52. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Generation: 28
  53. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Membership state: Cluster-Member
  54. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Nodes: 2
  55. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Expected votes: 1
  56. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Total votes: 2
  57. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Version: 6.2.0
  58. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Node votes: 1
  59. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Config Version: 2
  60. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Quorum: 1
  61. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Active subsystems: 7
  62. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Name: WSGClust
  63. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Flags: 2node
  64. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Id: 30485
  65. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Ports Bound: 0
  66. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Member: Yes
  67. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Node name: wsguardian1
  68. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Cluster Generation: 28
  69. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Membership state: Cluster-Member
  70. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Nodes: 2
  71. Feb 3 17:48:13 wsguardian1 rhcs_fence: 469; DEBUG: Attempt to get local node name via 'cman_tool status' exited with: [0]
  72. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Expected votes: 1
  73. Feb 3 17:48:13 wsguardian1 rhcs_fence: 471; DEBUG: I am: [wsguardian1]
  74. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Total votes: 2
  75. Feb 3 17:48:13 wsguardian1 rhcs_fence: 403; DEBUG: Checking the state of resource with minor number: [1]
  76. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Node votes: 1
  77. Feb 3 17:48:13 wsguardian1 rhcs_fence: 409; DEBUG: minor: [1]
  78. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Quorum: 1
  79. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Active subsystems: 7
  80. Feb 3 17:48:13 wsguardian1 rhcs_fence: 412; DEBUG: shell call: [</proc/drbd]
  81. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Flags: 2node
  82. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Ports Bound: 0
  83. Feb 3 17:48:13 wsguardian1 rhcs_fence: 460; DEBUG: output: Node name: wsguardian1
  84. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: version: 8.4.1 (api:1/proto:86-100)
  85. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by dag@Build64R6, 2011-12-21 06:08:50
  86. Feb 3 17:48:13 wsguardian1 rhcs_fence: 469; DEBUG: Attempt to get local node name via 'cman_tool status' exited with: [0]
  87. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: 0: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  88. Feb 3 17:48:13 wsguardian1 rhcs_fence: 471; DEBUG: I am: [wsguardian1]
  89. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  90. Feb 3 17:48:13 wsguardian1 rhcs_fence: 403; DEBUG: Checking the state of resource with minor number: [0]
  91. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: 1: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  92. Feb 3 17:48:13 wsguardian1 rhcs_fence: 409; DEBUG: minor: [0]
  93. Feb 3 17:48:13 wsguardian1 rhcs_fence: 423; DEBUG: read state of minor: [1] as: [UpToDate]
  94. Feb 3 17:48:13 wsguardian1 rhcs_fence: 412; DEBUG: shell call: [</proc/drbd]
  95. Feb 3 17:48:13 wsguardian1 rhcs_fence: 425; DEBUG: sys::local_res_uptodate: [1]
  96. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  97. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: version: 8.4.1 (api:1/proto:86-100)
  98. Feb 3 17:48:13 wsguardian1 rhcs_fence: 430; DEBUG: Attempt to collect UpToDate information device with minor: [1] exited with: [0]
  99. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by dag@Build64R6, 2011-12-21 06:08:50
  100. Feb 3 17:48:13 wsguardian1 rhcs_fence: 432; DEBUG: UpToDate: [1]
  101. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: 0: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  102. Feb 3 17:48:13 wsguardian1 rhcs_fence: 367; DEBUG: shell call: [/usr/sbin/cman_tool -a -F id,name,type,addr nodes]
  103. Feb 3 17:48:13 wsguardian1 rhcs_fence: 423; DEBUG: read state of minor: [0] as: [UpToDate]
  104. Feb 3 17:48:13 wsguardian1 rhcs_fence: 425; DEBUG: sys::local_res_uptodate: [1]
  105. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  106. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: 1: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  107. Feb 3 17:48:13 wsguardian1 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  108. Feb 3 17:48:13 wsguardian1 rhcs_fence: 430; DEBUG: Attempt to collect UpToDate information device with minor: [0] exited with: [0]
  109. Feb 3 17:48:13 wsguardian1 rhcs_fence: 432; DEBUG: UpToDate: [1]
  110. Feb 3 17:48:13 wsguardian1 rhcs_fence: 367; DEBUG: shell call: [/usr/sbin/cman_tool -a -F id,name,type,addr nodes]
  111. Feb 3 17:48:13 wsguardian1 rhcs_fence: 374; DEBUG: id: [1], name: [wsguardian1], member: [M], address: [192.168.201.43]
  112. Feb 3 17:48:13 wsguardian1 rhcs_fence: 380; DEBUG: output: 1 wsguardian1 M 192.168.201.43
  113. Feb 3 17:48:13 wsguardian1 rhcs_fence: 374; DEBUG: id: [2], name: [wsguardian2], member: [M], address: [192.168.201.180]
  114. Feb 3 17:48:13 wsguardian1 rhcs_fence: 380; DEBUG: output: 2 wsguardian2 M 192.168.201.180
  115. Feb 3 17:48:13 wsguardian1 rhcs_fence: 384; DEBUG: Attempt to gather cluster member information exited with: [0]
  116. Feb 3 17:48:13 wsguardian1 rhcs_fence: 374; DEBUG: id: [1], name: [wsguardian1], member: [M], address: [192.168.201.43]
  117. Feb 3 17:48:13 wsguardian1 rhcs_fence: 343; I have identified my target: [wsguardian2]
  118. Feb 3 17:48:13 wsguardian1 rhcs_fence: 380; DEBUG: output: 1 wsguardian1 M 192.168.201.43
  119. Feb 3 17:48:13 wsguardian1 rhcs_fence: 191; DEBUG: I am: [wsguardian1] and my id is: [1]
  120. Feb 3 17:48:13 wsguardian1 rhcs_fence: 374; DEBUG: id: [2], name: [wsguardian2], member: [M], address: [192.168.201.180]
  121. Feb 3 17:48:13 wsguardian1 rhcs_fence: 380; DEBUG: output: 2 wsguardian2 M 192.168.201.180
  122. Feb 3 17:48:13 wsguardian1 rhcs_fence: 223; I am the first node, so I won't delay.
  123. Feb 3 17:48:13 wsguardian1 rhcs_fence: 301; Target node: [wsguardian2] is a cluster member, attempting to eject.
  124. Feb 3 17:48:13 wsguardian1 rhcs_fence: 384; DEBUG: Attempt to gather cluster member information exited with: [0]
  125. Feb 3 17:48:13 wsguardian1 rhcs_fence: 304; DEBUG: shell call: [/usr/sbin/cman_tool kill -f wsguardian2]
  126. Feb 3 17:48:13 wsguardian1 rhcs_fence: 343; I have identified my target: [wsguardian2]
  127. Feb 3 17:48:13 wsguardian1 rhcs_fence: 191; DEBUG: I am: [wsguardian1] and my id is: [1]
  128. Feb 3 17:48:13 wsguardian1 rhcs_fence: 223; I am the first node, so I won't delay.
  129. Feb 3 17:48:13 wsguardian1 rhcs_fence: 301; Target node: [wsguardian2] is a cluster member, attempting to eject.
  130. Feb 3 17:48:13 wsguardian1 rhcs_fence: 304; DEBUG: shell call: [/usr/sbin/cman_tool kill -f wsguardian2]
  131. Feb 3 17:48:13 wsguardian1 rhcs_fence: 313; DEBUG: Attempt to force-remove node: [wsguardian2] exited with: [256]
  132. Feb 3 17:48:13 wsguardian1 rhcs_fence: 243; Fencing target: [wsguardian2]...
  133. Feb 3 17:48:13 wsguardian1 rhcs_fence: 247; DEBUG: shell call: [/usr/sbin/fence_node -v wsguardian2]
  134. Feb 3 17:48:13 wsguardian1 rhcs_fence: 313; DEBUG: Attempt to force-remove node: [wsguardian2] exited with: [256]
  135. Feb 3 17:48:13 wsguardian1 rhcs_fence: 243; Fencing target: [wsguardian2]...
  136. Feb 3 17:48:13 wsguardian1 rhcs_fence: 247; DEBUG: shell call: [/usr/sbin/fence_node -v wsguardian2]
  137.  
  138. LOG NODE2
  139.  
  140. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: PingAck did not arrive in time.
  141. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown ) susp( 0 -> 1 )
  142. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: asender terminated
  143. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: Terminating asender thread
  144. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: Connection closed
  145. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: conn( NetworkFailure -> Unconnected )
  146. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: receiver terminated
  147. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: Restarting receiver thread
  148. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: receiver (re)started
  149. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: conn( Unconnected -> WFConnection )
  150. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_db: helper command: /sbin/drbdadm fence-peer wsg_db
  151. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: PingAck did not arrive in time.
  152. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: peer( Primary -> Unknown ) conn( Connected -> NetworkFailure ) pdsk( UpToDate -> DUnknown ) susp( 0 -> 1 )
  153. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: asender terminated
  154. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: Terminating asender thread
  155. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: Connection closed
  156. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: conn( NetworkFailure -> Unconnected )
  157. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: receiver terminated
  158. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: Restarting receiver thread
  159. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: receiver (re)started
  160. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: conn( Unconnected -> WFConnection )
  161. Feb 3 17:48:13 wsguardian2 kernel: d-con wsg_config: helper command: /sbin/drbdadm fence-peer wsg_config
  162. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [uname] is at: [/bin/uname]
  163. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  164. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [fence_node] is at: [/usr/sbin/fence_node]
  165. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  166. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [cman_tool] is at: [/usr/sbin/cman_tool]
  167. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  168. Feb 3 17:48:13 wsguardian2 rhcs_fence: 74; Attempting to fence peer using RHCS from DRBD...
  169. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_RESOURCE] == [wsg_config]
  170. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_MINOR] == [1]
  171. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_PEERS] == [wsguardian1]
  172. Feb 3 17:48:13 wsguardian2 rhcs_fence: 454; DEBUG: shell call: [/usr/sbin/cman_tool status]
  173. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [uname] is at: [/bin/uname]
  174. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  175. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [fence_node] is at: [/usr/sbin/fence_node]
  176. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  177. Feb 3 17:48:13 wsguardian2 rhcs_fence: 125; DEBUG: Checking if: [cman_tool] is at: [/usr/sbin/cman_tool]
  178. Feb 3 17:48:13 wsguardian2 rhcs_fence: 156; DEBUG: Found!
  179. Feb 3 17:48:13 wsguardian2 rhcs_fence: 74; Attempting to fence peer using RHCS from DRBD...
  180. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_RESOURCE] == [wsg_db]
  181. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_MINOR] == [0]
  182. Feb 3 17:48:13 wsguardian2 rhcs_fence: 80; DEBUG: Environment variable: [DRBD_PEERS] == [wsguardian1]
  183. Feb 3 17:48:13 wsguardian2 rhcs_fence: 454; DEBUG: shell call: [/usr/sbin/cman_tool status]
  184. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Version: 6.2.0
  185. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Config Version: 2
  186. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Name: WSGClust
  187. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Id: 30485
  188. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Member: Yes
  189. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Generation: 28
  190. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Membership state: Cluster-Member
  191. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Nodes: 2
  192. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Version: 6.2.0
  193. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Expected votes: 1
  194. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Total votes: 2
  195. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Config Version: 2
  196. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Node votes: 1
  197. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Name: WSGClust
  198. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Quorum: 1
  199. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Id: 30485
  200. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Active subsystems: 7
  201. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Member: Yes
  202. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Flags: 2node
  203. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Cluster Generation: 28
  204. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Membership state: Cluster-Member
  205. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Ports Bound: 0
  206. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Nodes: 2
  207. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Node name: wsguardian2
  208. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Expected votes: 1
  209. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Total votes: 2
  210. Feb 3 17:48:13 wsguardian2 rhcs_fence: 469; DEBUG: Attempt to get local node name via 'cman_tool status' exited with: [0]
  211. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Node votes: 1
  212. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Quorum: 1
  213. Feb 3 17:48:13 wsguardian2 rhcs_fence: 471; DEBUG: I am: [wsguardian2]
  214. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Active subsystems: 7
  215. Feb 3 17:48:13 wsguardian2 rhcs_fence: 403; DEBUG: Checking the state of resource with minor number: [1]
  216. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Flags: 2node
  217. Feb 3 17:48:13 wsguardian2 rhcs_fence: 409; DEBUG: minor: [1]
  218. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Ports Bound: 0
  219. Feb 3 17:48:13 wsguardian2 rhcs_fence: 460; DEBUG: output: Node name: wsguardian2
  220. Feb 3 17:48:13 wsguardian2 rhcs_fence: 469; DEBUG: Attempt to get local node name via 'cman_tool status' exited with: [0]
  221. Feb 3 17:48:13 wsguardian2 rhcs_fence: 412; DEBUG: shell call: [</proc/drbd]
  222. Feb 3 17:48:13 wsguardian2 rhcs_fence: 471; DEBUG: I am: [wsguardian2]
  223. Feb 3 17:48:13 wsguardian2 rhcs_fence: 403; DEBUG: Checking the state of resource with minor number: [0]
  224. Feb 3 17:48:13 wsguardian2 rhcs_fence: 409; DEBUG: minor: [0]
  225. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: version: 8.4.1 (api:1/proto:86-100)
  226. Feb 3 17:48:13 wsguardian2 rhcs_fence: 412; DEBUG: shell call: [</proc/drbd]
  227. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by dag@Build64R6, 2011-12-21 06:08:50
  228. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: version: 8.4.1 (api:1/proto:86-100)
  229. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: 0: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  230. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  231. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by dag@Build64R6, 2011-12-21 06:08:50
  232. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: 1: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  233. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: 0: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  234. Feb 3 17:48:13 wsguardian2 rhcs_fence: 423; DEBUG: read state of minor: [1] as: [UpToDate]
  235. Feb 3 17:48:13 wsguardian2 rhcs_fence: 425; DEBUG: sys::local_res_uptodate: [1]
  236. Feb 3 17:48:13 wsguardian2 rhcs_fence: 423; DEBUG: read state of minor: [0] as: [UpToDate]
  237. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  238. Feb 3 17:48:13 wsguardian2 rhcs_fence: 425; DEBUG: sys::local_res_uptodate: [1]
  239. Feb 3 17:48:13 wsguardian2 rhcs_fence: 430; DEBUG: Attempt to collect UpToDate information device with minor: [1] exited with: [0]
  240. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  241. Feb 3 17:48:13 wsguardian2 rhcs_fence: 432; DEBUG: UpToDate: [1]
  242. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: 1: cs:WFConnection ro:Primary/Unknown ds:UpToDate/DUnknown C s-----
  243. Feb 3 17:48:13 wsguardian2 rhcs_fence: 418; DEBUG: output: ns:0 nr:0 dw:0 dr:664 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
  244. Feb 3 17:48:13 wsguardian2 rhcs_fence: 367; DEBUG: shell call: [/usr/sbin/cman_tool -a -F id,name,type,addr nodes]
  245. Feb 3 17:48:13 wsguardian2 rhcs_fence: 430; DEBUG: Attempt to collect UpToDate information device with minor: [0] exited with: [0]
  246. Feb 3 17:48:13 wsguardian2 rhcs_fence: 432; DEBUG: UpToDate: [1]
  247. Feb 3 17:48:13 wsguardian2 rhcs_fence: 367; DEBUG: shell call: [/usr/sbin/cman_tool -a -F id,name,type,addr nodes]
  248. Feb 3 17:48:13 wsguardian2 rhcs_fence: 374; DEBUG: id: [1], name: [wsguardian1], member: [M], address: [192.168.201.43]
  249. Feb 3 17:48:13 wsguardian2 rhcs_fence: 380; DEBUG: output: 1 wsguardian1 M 192.168.201.43
  250. Feb 3 17:48:13 wsguardian2 rhcs_fence: 374; DEBUG: id: [2], name: [wsguardian2], member: [M], address: [192.168.201.180]
  251. Feb 3 17:48:13 wsguardian2 rhcs_fence: 380; DEBUG: output: 2 wsguardian2 M 192.168.201.180
  252. Feb 3 17:48:13 wsguardian2 rhcs_fence: 384; DEBUG: Attempt to gather cluster member information exited with: [0]
  253. Feb 3 17:48:13 wsguardian2 rhcs_fence: 343; I have identified my target: [wsguardian1]
  254. Feb 3 17:48:13 wsguardian2 rhcs_fence: 374; DEBUG: id: [1], name: [wsguardian1], member: [M], address: [192.168.201.43]
  255. Feb 3 17:48:13 wsguardian2 rhcs_fence: 191; DEBUG: I am: [wsguardian2] and my id is: [2]
  256. Feb 3 17:48:13 wsguardian2 rhcs_fence: 380; DEBUG: output: 1 wsguardian1 M 192.168.201.43
  257. Feb 3 17:48:13 wsguardian2 rhcs_fence: 217; Delaying for: [9] seconds to avoid dual-fencing...
  258. Feb 3 17:48:13 wsguardian2 rhcs_fence: 374; DEBUG: id: [2], name: [wsguardian2], member: [M], address: [192.168.201.180]
  259. Feb 3 17:48:13 wsguardian2 rhcs_fence: 380; DEBUG: output: 2 wsguardian2 M 192.168.201.180
  260. Feb 3 17:48:13 wsguardian2 rhcs_fence: 384; DEBUG: Attempt to gather cluster member information exited with: [0]
  261. Feb 3 17:48:13 wsguardian2 rhcs_fence: 343; I have identified my target: [wsguardian1]
  262. Feb 3 17:48:13 wsguardian2 rhcs_fence: 191; DEBUG: I am: [wsguardian2] and my id is: [2]
  263. Feb 3 17:48:13 wsguardian2 rhcs_fence: 217; Delaying for: [9] seconds to avoid dual-fencing...
  264. Feb 3 17:48:22 wsguardian2 rhcs_fence: 219; DEBUG: Right then, break over.
  265. Feb 3 17:48:22 wsguardian2 rhcs_fence: 301; Target node: [wsguardian1] is a cluster member, attempting to eject.
  266. Feb 3 17:48:22 wsguardian2 rhcs_fence: 219; DEBUG: Right then, break over.
  267. Feb 3 17:48:22 wsguardian2 rhcs_fence: 304; DEBUG: shell call: [/usr/sbin/cman_tool kill -f wsguardian1]
  268. Feb 3 17:48:22 wsguardian2 rhcs_fence: 301; Target node: [wsguardian1] is a cluster member, attempting to eject.
  269. Feb 3 17:48:22 wsguardian2 rhcs_fence: 304; DEBUG: shell call: [/usr/sbin/cman_tool kill -f wsguardian1]
  270. Feb 3 17:48:22 wsguardian2 rhcs_fence: 313; DEBUG: Attempt to force-remove node: [wsguardian1] exited with: [256]
  271. Feb 3 17:48:22 wsguardian2 rhcs_fence: 243; Fencing target: [wsguardian1]...
  272. Feb 3 17:48:22 wsguardian2 rhcs_fence: 313; DEBUG: Attempt to force-remove node: [wsguardian1] exited with: [256]
  273. Feb 3 17:48:22 wsguardian2 rhcs_fence: 247; DEBUG: shell call: [/usr/sbin/fence_node -v wsguardian1]
  274. Feb 3 17:48:22 wsguardian2 rhcs_fence: 243; Fencing target: [wsguardian1]...
  275. Feb 3 17:48:22 wsguardian2 rhcs_fence: 247; DEBUG: shell call: [/usr/sbin/fence_node -v wsguardian1]
  276.  
  277.  
  278.  
  279. cluster.conf
  280. <?xml version="1.0"?>
  281. <cluster config_version="2" name="WSGClust">
  282. <cman expected_votes="1" two_node="1"/>
  283. <clusternodes>
  284. <clusternode name="wsguardian1" nodeid="1">
  285. <fence>
  286. <method name="wsguardian1_ipmi">
  287. <device name="ipmi1" action="reboot"/>
  288. </method>
  289. </fence>
  290. </clusternode>
  291. <clusternode name="wsguardian2" nodeid="2">
  292. <fence>
  293. <method name="wsguardian2_ipmi">
  294. <device name="ipmi2" action="reboot"/>
  295. </method>
  296. </fence>
  297. </clusternode>
  298. </clusternodes>
  299. <fencedevices>
  300. <fencedevice agent="fence_ipmilan" ipaddr="192.168.201.220" lanplus="1" login="ADMIN" name="ipmi1" passwd="easy"/>
  301. <fencedevice agent="fence_ipmilan" ipaddr="192.168.201.186" lanplus="1" login="ADMIN" name="ipmi2" passwd="easy"/>
  302. </fencedevices>
  303. <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="30"/>
  304. </cluster>
  305.  
  306. drbd.conf
  307. global {
  308. usage-count no;
  309. }
  310. common {
  311. protocol C;
  312.  
  313. syncer {
  314. rate 100M;
  315. al-extents 3389;
  316. }
  317.  
  318. # This tells DRBD what policy to use when a fence is required.
  319. disk {
  320.  
  321. fencing resource-and-stonith;
  322. }
  323.  
  324. handlers {
  325.  
  326. #outdate-peer "/sbin/obliterate-peer.sh";
  327. outdate-peer "/usr/sbin/rhcs_fence";
  328.  
  329. split-brain "/usr/lib/drbd/notify-split-brain.sh kortux@gmail.com";
  330.  
  331. }
  332.  
  333. net {
  334. sndbuf-size 1024k;
  335. # Tell DRBD to allow dual-primary.
  336. allow-two-primaries;
  337.  
  338. # Set the recover policy for split-brain recover when no device
  339. # in the resource was primary.
  340. after-sb-0pri discard-zero-changes;
  341.  
  342. # Now if one device was primary.
  343. after-sb-1pri discard-secondary;
  344.  
  345. # Finally, set the policy when both nodes were Primary. The
  346. # only viable option is 'disconnect', which tells DRBD to
  347. # simply tear-down the DRBD resource right away and wait for
  348. # the administrator to manually invalidate one side of the
  349. # resource.
  350. after-sb-2pri disconnect;
  351. }
  352. startup {
  353. become-primary-on both;
  354. }
  355.  
  356. }
  357. resource wsg_db {
  358. device /dev/drbd0;
  359. meta-disk internal;
  360.  
  361.  
  362. on wsguardian1 {
  363. address 192.168.253.1:7788;
  364. disk /dev/rootvg/wsg_data_lv;
  365. }
  366.  
  367. on wsguardian2 {
  368. address 192.168.253.2:7788;
  369. disk /dev/rootvg/wsg_data_lv;
  370. }
  371. }
  372.  
  373. resource wsg_config {
  374. device /dev/drbd1;
  375. meta-disk internal;
  376.  
  377. on wsguardian1 {
  378. address 192.168.253.1:7789;
  379. disk /dev/rootvg/wsg_config_lv;
  380. }
  381.  
  382. on wsguardian2 {
  383. address 192.168.253.2:7789;
  384. disk /dev/rootvg/wsg_config_lv;
  385. }
  386. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement