Advertisement
Guest User

vrabie_mica network failover control script

a guest
Mar 17th, 2025
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.32 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. LOCKF=/run/isp-failover
  4. if ! lockfile-create -r1 $LOCKF; then exit; fi
  5.  
  6. # REPS*WAIT must be < 60sec for once/min crontab execution
  7. REPS=4
  8. WAIT=14
  9.  
  10. WAN1=eth0.5
  11. WAN2=eth0.4
  12. # WAN2=wwan0_1
  13.  
  14. NO_REVERT=/tmp/no-revert
  15. NO_SENIN=/tmp/no-senin
  16.  
  17. WAN1_GW=`</run/DGW`
  18. WAN2_GW=`</run/LGW`
  19.  
  20. TEST_LIST="8.8.8.8 1.0.0.1 9.9.9.9 208.67.222.222 192.5.5.241 64.79.96.12 198.205.126.25"
  21.  
  22. # how many test points must be unreachable to declare an outage
  23. TRIGGER=4
  24.  
  25. # probe fewer test points when on metered cellular
  26. CELLBK_TEST="8.8.8.8 1.0.0.1 208.67.222.222"
  27. CELLBK_TRIGGER=3
  28. CELLBK_REVERT_DELAY=3
  29.  
  30. REVERT_NORMAL_MIN=10
  31. REVERT_NORMAL_PPS=20
  32. TRAFFIC_WINDOW=3
  33.  
  34. FAILOVER_TABLES="254 10 20 250"
  35. FAILOVER_TUNNELS="tun25 tun11 tun41 tun3 tun1 tun0 tun6 tun2 tun4 tun8" # priority order
  36.  
  37. CELLFAIL_TABLES="254 10 20" # skip 250; never route Smart TVs via metered LTE
  38. CELL_TUNNEL=tun3
  39.  
  40. PREFER_WAN1_TUNNELS="tun11 tun25 tun41 tun3 tun1 tun6 tun4 tun8"
  41. PREFER_WAN2_TUNNELS="tun2 tun0"
  42. PREFER_WAN1_TABLES="254 10 20"
  43. PREFER_WAN2_TABLES="250"
  44. PREFER_VOIP=$WAN1
  45.  
  46. PREFER_SENIN=$WAN2
  47.  
  48. OVPN=/etc/openvpn/imp
  49. ON_CELLBK=/tmp/.on-lte
  50. WAN1_DOWN=/run/.wan1-down
  51. WAN2_DOWN=/run/.wan2-down
  52. NORMAL=/run/.uplinks-normal
  53. BACK_TO_NORMAL=/run/.uplinks-back
  54. CUTOVER=/run/.wan-cutover
  55.  
  56. ASTCH=/tmp/_astch_ifl
  57. DR=/usr/local/sbin/devregs
  58. PWM2SET="/usr/local/sbin/imx6pwm -q 2"
  59.  
  60. LOGF=/var/log/isp-failover
  61. TMPF=/tmp/ifl-$$
  62.  
  63. shopt -s expand_aliases
  64. alias DT='date +"%a %Y-%m-%d %H:%M:%S"'
  65.  
  66. PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/sbin:/usr/local/bin
  67.  
  68. # local status indicator LED
  69. function led2 () {
  70. $DR IOMUXC_SW_MUX_CTL_PAD_DISP0_DAT9 2 >/dev/null 2>&1
  71. $PWM2SET $1
  72. }
  73.  
  74. function cleanexit () {
  75. lockfile-remove $LOCKF
  76. rm -f $TMPF
  77. exit 0
  78. }
  79.  
  80. function rxpkts () {
  81. ifconfig $1 |grep " RX packets:" |cut -d: -f2|cut -d' ' -f1
  82. }
  83.  
  84. function senin_defroute () {
  85. if [[ ! -f $NO_SENIN ]]; then
  86. if fping -B1 -b12 senin; then
  87. if [[ "$1" = "$WAN2" ]]; then
  88. ssh senin defroute tmo
  89. else
  90. ssh senin defroute quasit
  91. fi
  92. fi
  93. fi
  94. }
  95.  
  96. function revert_to_preferred_links () {
  97. echo "`DT` : revert to preferred links" | tee -a $LOGF
  98.  
  99. for t in $PREFER_WAN1_TABLES; do
  100. R=`ip route show table $t | grep ^default`
  101. if echo $R | grep -q $WAN1; then
  102. echo "`DT` : skipping table $t - already on $WAN1"
  103. else
  104. C="ip route delete table $t $R"
  105. $C 2>/dev/null
  106. $C 2>/dev/null
  107. echo "`DT` : $C"
  108. C="ip route add table $t default via $WAN1_GW dev $WAN1"
  109. $C
  110. echo "`DT` : $C"
  111. fi
  112. done
  113.  
  114. voip -$PREFER_VOIP
  115.  
  116. for t in $PREFER_WAN2_TABLES; do
  117. R=`ip route show table $t | grep ^default`
  118. if echo $R | grep -q $WAN2; then
  119. echo "`DT` : skipping table $t - already on $WAN2"
  120. else
  121. C="ip route delete table $t $R"
  122. $C 2>/dev/null
  123. $C 2>/dev/null
  124. echo "`DT` : $C"
  125. C="ip route add table $t default via $WAN2_GW dev $WAN2"
  126. $C
  127. echo "`DT` : $C"
  128. fi
  129. done
  130.  
  131. for t in $PREFER_WAN1_TUNNELS; do
  132. if ! $t |grep -q $WAN1; then
  133. echo "`DT` : revert $t to $WAN1"
  134. $t $WAN1
  135. else
  136. echo "skipping $t"
  137. fi
  138. done
  139.  
  140. for t in $PREFER_WAN2_TUNNELS; do
  141. if ! $t |grep -q $WAN2; then
  142. echo "`DT` : revert $t to $WAN2"
  143. $t $WAN2
  144. else
  145. echo "skipping $t"
  146. fi
  147. done
  148.  
  149. senin_defroute $PREFER_SENIN
  150. }
  151.  
  152. function cutover_wan_to_wan () {
  153. echo "`DT` : cutover defaults: $1 to $2" | tee -a $LOGF
  154. for t in $FAILOVER_TABLES; do
  155. R=`ip route show table $t | grep ^default`
  156. if echo $R | grep -q $1; then
  157. C="ip route delete table $t $R"
  158. $C
  159. $C 2>/dev/null
  160. echo "`DT` : $C"
  161. if [[ "$2" == "$WAN1" ]]; then GW=$WAN1_GW; else GW=$WAN2_GW; fi
  162. C="ip route add table $t default via $GW dev $2"
  163. $C
  164. echo "`DT` : $C"
  165. elif [[ "$R" = "" ]]; then
  166. echo "`DT` : table $t - no default"
  167. if [[ "$2" == "$WAN1" ]]; then GW=$WAN1_GW; else GW=$WAN2_GW; fi
  168. C="ip route add table $t default via $GW dev $2"
  169. $C
  170. echo "`DT` : $C"
  171. else
  172. echo "`DT` : skipping table $t - no default to $1"
  173. fi
  174. done
  175.  
  176. voip -$2
  177.  
  178. echo "`DT` : cutover tunnels: $1 to $2" | tee -a $LOGF
  179. for t in $FAILOVER_TUNNELS; do
  180. if $t |grep -q $1; then
  181. echo "`DT` : $t to $2"
  182. $t $2
  183. elif ! $t |grep -q UGH; then
  184. echo "`DT` : $t no route - to $2"
  185. $t $2
  186. else
  187. echo "skipping $t"
  188. fi
  189. done
  190.  
  191. senin_defroute $2
  192. }
  193.  
  194.  
  195. function cutover_cellbk_to_wan() {
  196. led2 30
  197. RF=/run/.wan${1}-back
  198.  
  199. if [[ ! -f $RF ]]; then
  200. echo 0 >$RF
  201. echo "`DT` : WAN$1 restored? count 0" | tee -a $LOGF
  202. cleanexit
  203. fi
  204.  
  205. TIME_BACK=`<$RF`
  206. if [[ $TIME_BACK -lt $CELLBK_REVERT_DELAY ]]; then
  207. TIME_BACK=$[ $TIME_BACK + 1]
  208. echo "$TIME_BACK" >$RF
  209. echo "`DT` : WAN$1 restored? count $TIME_BACK" | tee -a $LOGF
  210. cleanexit
  211. fi
  212.  
  213. asterisk -rx "core show channels" |grep SIP\/ >$ASTCH
  214. if [[ -s $ASTCH ]]; then
  215. TIME_BACK=$[ $TIME_BACK + 1]
  216. echo "$TIME_BACK" >$RF
  217. echo "`DT` : WAN$1 restored - count $TIME_BACK - waiting on phone calls" | tee -a $LOGF
  218. cleanexit
  219. fi
  220.  
  221. rm -rf $RF
  222. echo "`DT` : cutover defaults: Cellular-Backup to $1" | tee -a $LOGF
  223. for t in $FAILOVER_TABLES; do
  224. R=`ip route show table $t | grep ^default`
  225. if echo $R | grep -q ' lte'; then # don't repoint tunneled defaults
  226. C="ip route delete table $t $R"
  227. $C
  228. $C 2>/dev/null
  229. echo "`DT` : $C"
  230. if [[ "$1" == "$WAN1" ]]; then GW=$WAN1_GW; else GW=$WAN2_GW; fi
  231. C="ip route add table $t default via $GW dev $1"
  232. $C
  233. echo "`DT` : $C"
  234. else
  235. echo "`DT` : skipping table $t - no cell-backup default"
  236. fi
  237. done
  238.  
  239. voip -$1
  240.  
  241. echo "`DT` : cutover tunnels: Cellular-Backup to $1" | tee -a $LOGF
  242. for t in $FAILOVER_TUNNELS; do
  243. if $t |grep -q " lte"; then
  244. echo "`DT` : $t to $1"
  245. $t $1
  246. else
  247. echo "skipping $t"
  248. fi
  249. done
  250.  
  251. rm -f $ON_CELLBK
  252.  
  253. # stop flashing cell-backup indicator LEDs (local & remote)
  254. $PWM2SET 0
  255. led 1 0
  256.  
  257. senin_defroute $2
  258. }
  259.  
  260.  
  261. function cutover_to_cellbk () {
  262. LL=$1
  263.  
  264. # start outage-warning LEDs flashing
  265. led2 16
  266. led 1 15
  267.  
  268. TRIED=0
  269. LINKUP=0
  270. while [[ $LINKUP == 0 && $TRIED -lt 3 ]]; do
  271. if [[ $TRIES -ge 3 ]]; then
  272. echo "`DT` : NO USABLE BACKUP LINK" | tee -a $LOGF
  273. cleanexit # give up for now; hope a WAN link returns
  274. fi
  275. if ! ip link show |grep -q lte$LL; then
  276. echo "`DT` : bringing up cellular link $LL" | tee -a $LOGF
  277. tether$LL up
  278. else
  279. echo "`DT` : cellular link $LL already up" | tee -a $LOGF
  280. fi
  281.  
  282. if ! ip link show |grep -q lte$LL; then
  283. TRIED="$[ $TRIED + 1 ]"
  284. NEXT_CELL="$[ $LL + 1 ]"
  285. if [[ $NEXT_CELL -gt 3 || $NEXT_CELL -lt 2 ]]; then NEXT_CELL=1; fi
  286. echo "`DT` : CELL-${LL} UNUSABLE, TRYING CELL-${NEXT_CELL}" | tee -a $LOGF
  287. LL=$NEXT_CELL
  288. else
  289. LINKUP=1
  290.  
  291. echo "`DT` : cutover tunnels to CELL-$LL" | tee -a $LOGF
  292. for t in $FAILOVER_TUNNELS; do
  293. if $t |grep -qe $WAN1 -e $WAN2; then
  294. echo "`DT` : $t to lte$LL"
  295. $t lte$LL
  296. else
  297. echo "skipping $t"
  298. fi
  299. done
  300.  
  301. voip -lte$LL
  302. # voip $CELL_TUNNEL
  303.  
  304. echo "`DT` : cutover defaults to cell-tunnel $CELL_TUNNEL" | tee -a $LOGF
  305. for t in $CELLFAIL_TABLES; do
  306. R=`ip route show table $t | grep ^default`
  307. if echo $R | grep -vqe " lte" -e " tun"; then
  308. C="ip route delete table $t $R"
  309. $C
  310. $C 2>/dev/null
  311. echo "`DT` : $C" | tee -a $LOGF
  312. C="ip route add table $t default dev $CELL_TUNNEL"
  313. $C
  314. echo "`DT` : $C" | tee -a $LOGF
  315. else
  316. echo "`DT` : skipping table $t" | tee -a $LOGF
  317. fi
  318. done
  319.  
  320. echo $LL >$ON_CELLBK
  321.  
  322. if [[ $LL -ge 2 ]]; then
  323. # flash LEDs more rapidly while on expensive/limited backup path
  324. $PWM2SET 31881 30 1
  325. led 1 1 188
  326. else
  327. # once/second LED flash while on cell-backup #1
  328. led2 1
  329. led 1 1
  330. fi
  331.  
  332. senin_defroute quasit
  333.  
  334. cleanexit # exit until next invokation after bringing up cell link
  335.  
  336. fi
  337.  
  338. done
  339. }
  340.  
  341.  
  342. function test_run () {
  343. WAN1_UP=1
  344. WAN2_UP=1
  345. WAN1_WAS_DOWN=1
  346. WAN2_WAS_DOWN=1
  347.  
  348. if ! fping -B1 -ub12 -I$WAN1 -S$WAN1IP $TEST_LIST >$TMPF 2>/dev/null; then
  349. UNREACH=`cat $TMPF |xargs`
  350. if [[ ! -f $WAN1_DOWN ]]; then # avoid repetitive messages during outage
  351. echo "`DT` : down via $WAN1: $UNREACH" | tee -a $LOGF
  352. WAN1_WAS_DOWN=0
  353. fi
  354. if [[ `cat $TMPF | wc -l` -ge $TRIGGER ]]; then
  355. WAN1_UP=0
  356. echo "$UNREACH" > $WAN1_DOWN
  357. else
  358. led2 32
  359. rm -f $WAN1_DOWN
  360. fi
  361. else
  362. rm -f $WAN1_DOWN
  363. fi
  364.  
  365. if ! ifconfig $WAN2 >/dev/null 2>&1; then
  366. WAN2_UP=0
  367. if [[ ! -f $WAN2_DOWN ]]; then # avoid repetitive messages during outage
  368. echo "`DT` : $WAN2 interface missing!" | tee -a $LOGF
  369. WAN2_WAS_DOWN=0
  370. fi
  371. echo "$WAN2 interface missing!" > $WAN2_DOWN
  372. elif ! fping -B1 -ub12 -I$WAN2 -S$WAN2IP $TEST_LIST >$TMPF 2>/dev/null; then
  373. UNREACH=`cat $TMPF |xargs`
  374. if [[ ! -f $WAN2_DOWN ]]; then # avoid repetitive messages during outage
  375. echo "`DT` : down via $WAN2: $UNREACH" | tee -a $LOGF
  376. WAN2_WAS_DOWN=0
  377. fi
  378. if [[ `cat $TMPF | wc -l` -ge $TRIGGER ]]; then
  379. WAN2_UP=0
  380. echo "$UNREACH" > $WAN2_DOWN
  381. else
  382. led2 32
  383. rm -f $WAN2_DOWN
  384. fi
  385. else
  386. rm -f $WAN2_DOWN
  387. fi
  388.  
  389. if [[ -f $ON_CELLBK ]]; then
  390. if [[ $WAN1_UP == 1 ]]; then
  391. touch /run/.cutover-cell-to-wan1
  392. touch $CUTOVER
  393. cutover_cellbk_to_wan $WAN1
  394. elif [[ $WAN2_UP == 1 ]]; then
  395. touch /run/.cutover-cell-to-wan2
  396. touch $CUTOVER
  397. cutover_cellbk_to_wan $WAN2
  398. else
  399. rm -f $NORMAL $BACK_TO_NORMAL
  400. CELL_INTF=lte`cat $ON_CELLBK`
  401. if ! fping -B1 -ub12 -I$CELL_INTF $CELLBK_TEST >$TMPF 2>/dev/null; then
  402. UNREACH=`cat $TMPF |xargs`
  403. echo "`DT` : down via $CELLBK_INTF: $UNREACH" | tee -a $LOGF
  404. if [[ `cat $TMPF | wc -l` -ge $CELLBK_TRIGGER ]]; then
  405. NEXT_CELL="$[ $CELL_INTF + 1 ]"
  406. if [[ $NEXT_CELL -gt 3 || $NEXT_CELL -lt 2 ]]; then NEXT_CELL=1; fi
  407. echo "`DT` : CELL-${CELL_INTF} UNUSABLE, TRYING CELL-${NEXT_CELL}" | tee -a $LOGF
  408. cutover_to_cellbk $NEXT_CELL
  409. fi
  410. else
  411. # only test once per cron invokation while on cell-backup with good link
  412. cleanexit
  413. fi
  414. fi
  415. else
  416. if [[ $WAN1_UP == 0 && $WAN2_UP == 1 && $WAN1_WAS_DOWN == 0 ]]; then
  417. touch /run/.cutover-wan1-to-2
  418. touch $CUTOVER
  419. rm -f $NORMAL $BACK_TO_NORMAL
  420. cutover_wan_to_wan $WAN1 $WAN2
  421. elif [[ $WAN2_UP == 0 && $WAN1_UP == 1 && $WAN2_WAS_DOWN == 0 ]]; then
  422. touch /run/.cutover-wan2-to-1
  423. touch $CUTOVER
  424. rm -f $NORMAL $BACK_TO_NORMAL
  425. cutover_wan_to_wan $WAN2 $WAN1
  426. elif [[ $WAN1_UP == 0 && $WAN2_UP == 0 ]]; then
  427. touch /run/.cutover-to-cell
  428. touch $CUTOVER
  429. rm -f $NORMAL $BACK_TO_NORMAL
  430. cutover_to_cellbk 1
  431. else
  432. if [[ $WAN1_UP == 1 && $WAN2_UP == 1 ]]; then
  433. $PWM2SET 0
  434. led 1 0
  435. touch $NORMAL
  436. if [[ ! -f $BACK_TO_NORMAL ]]; then
  437. touch -d "+$REVERT_NORMAL_MIN minutes" $BACK_TO_NORMAL
  438. elif [[ -f $CUTOVER && $NORMAL -nt $BACK_TO_NORMAL && ! -f $NO_REVERT ]]; then
  439. asterisk -rx "core show channels" |grep SIP\/ >$ASTCH
  440. if [[ ! -s $ASTCH ]]; then
  441. W1_RX_A=`rxpkts $WAN1`
  442. W2_RX_A=`rxpkts $WAN2`
  443. sleep $TRAFFIC_WINDOW
  444. W1_RX_Z=`rxpkts $WAN1`
  445. W2_RX_Z=`rxpkts $WAN2`
  446. W1_RX_PKTS=$[ $W1_RX_Z - $W1_RX_A ];
  447. W2_RX_PKTS=$[ $W2_RX_Z - $W2_RX_A ];
  448. PPS_THR=$[ $REVERT_NORMAL_PPS * $TRAFFIC_WINDOW ];
  449. if [[ $W1_RX_PKTS -lt $PPS_THR && $W2_RX_PKTS -lt $PPS_THR ]]; then
  450. echo "W1_RX_PKTS=$W1_RX_PKTS & W2_RX_PKTS=$W2_RX_PKTS < PPS_THR = $PPS_THR"
  451. revert_to_preferred_links
  452. rm -f $CUTOVER
  453. else
  454. echo "waiting for low-traffic period. W1_RX_PKTS=$W1_RX_PKTS / W2_RX_PKTS=$W2_RX_PKTS "
  455. fi
  456. else
  457. echo "waiting on phone calls to revert"
  458. fi
  459. fi
  460. fi
  461. fi
  462. fi
  463. }
  464.  
  465. # unused but retained for reference
  466. function list_client_tunnels() {
  467. cd $OVPN
  468. for v in `ps auxww |grep -v grep |grep "openvpn --config" |\
  469. awk '{print $13}' |sed 's/.*\///' | xargs grep -l ^tls-client`; do
  470. t=`grep "^dev " $v |head -n 1 |awk '{print $2}'`
  471. r=`grep ^remote $v |head -n 1|awk '{print $2}'`
  472. echo $v $r $t
  473. done
  474. }
  475.  
  476.  
  477. ## MAIN ENTRY POINT
  478.  
  479. #if [[ -f /run/.fastprobe ]]; then
  480. # REPS=$FAST_REPS
  481. # WAIT=$FAST_WAIT
  482. #fi
  483.  
  484. sleep 2
  485.  
  486. WAN1IP=`ip addr show dev $WAN1 |grep 'inet ' |grep -v 192.168.100 |head -n 1 | awk '{print $2}' |cut -d/ -f1`
  487. if [[ "$WAN1IP" = "" ]]; then WAN1IP=0.0.0.0; fi
  488.  
  489. WAN2IP=`ip addr show dev $WAN2 |grep 'inet ' |head -n 1 | awk '{print $2}' |cut -d/ -f1`
  490. if [[ "$WAN2IP" = "" ]]; then WAN1IP=0.0.0.0; fi
  491.  
  492. RUN=1
  493. while true; do
  494. test_run
  495. RUN=$[ $RUN + 1 ];
  496. if [[ $RUN -gt $REPS ]]; then
  497. # date
  498. cleanexit
  499. fi
  500. sleep $WAIT
  501. done
  502.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement