Advertisement
Guest User

replicate

a guest
Apr 16th, 2015
768
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 67.41 KB | None | 0 0
  1. #!/bin/sh
  2.  
  3. ##
  4. ## Initial and continous ZFS filesystems replication
  5. ##
  6.  
  7. PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
  8.  
  9. if [ ! `whoami` = "root" ]; then
  10. echo "You need to be root."
  11. exit
  12. fi
  13.  
  14.  
  15. l_old="@local_replicate.base"
  16. r_old="@remote_replicate.base"
  17. l_new="@local_replicate.delta"
  18. r_new="@remote_replicate.delta"
  19. log="/var/log/replicate.log"
  20. cmd=$(mktemp /tmp/replicate.job.XXXXXX)
  21. ERR=$(mktemp /tmp/replicate.err.XXXXXX)
  22. pid="/var/run/replicate.pid"
  23. path="/usr/local/etc/replicatejobs/"
  24. jobs=`find $path -type f ! -iname sample_* ! -iname noauto_* | sort -d`
  25.  
  26. ##
  27. ## Mail settings
  28. ##
  29. subject="A replication error has occurred!"
  30. address="storageadm@slu.se"
  31. mail=$(mktemp /tmp/replicate.mail.XXXXXX)
  32.  
  33. usage()
  34. {
  35. echo ""
  36. echo "Usage: `basename $0` [Options] [Flags] [Job]"
  37. echo ""
  38. echo "Options: -c|-h"
  39. echo "-c: Clean."
  40. echo " removes replicated filesystems and snapshot(s)."
  41. echo "-h: Usage."
  42. echo ""
  43. echo "Flags: auto"
  44. echo "auto: Clean auto."
  45. echo " removes all scheduled snapshot(s)."
  46. echo ""
  47. echo "Examples: `basename $0` | `basename $0` jobname"
  48. echo " `basename $0` -c | `basename $0` -c jobname | `basename $0` -c auto"
  49. echo ""
  50. }
  51.  
  52. l_base()
  53. {
  54. ##
  55. ## Makes local baseline replication
  56. ##
  57.  
  58. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  59. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  60. echo "" >> $log
  61. rm ${pid}
  62. exit 1
  63. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  64. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  65. echo "" >> $log
  66. rm ${pid}
  67. exit 1
  68. fi
  69.  
  70. if [ "$r" = "yes" ]
  71. then
  72. SNAPSHOT="zfs snapshot -r"
  73. SEND="zfs send -R"
  74. else
  75. SNAPSHOT="zfs snapshot"
  76. SEND="zfs send -p"
  77. fi
  78.  
  79. tfs=$(echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//")
  80.  
  81. echo "echo \"\`date\`: Beginning local baseline replication sequence on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  82.  
  83. # Take initial snapshot(s):
  84. echo "sudo ${SNAPSHOT} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  85. echo errorcheck >> ${cmd}
  86. echo "echo \"\`date\`: Initial snapshot(s) created\" >> $log" >> ${cmd}
  87.  
  88. # Replicate data:
  89. echo "sudo ${SEND} $lfs$l_old 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  90. echo errorcheck >> ${cmd}
  91. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  92.  
  93. # Delete unwanted, scheduled source snapshots from target:
  94. if [ "$r" = "yes" ]; then
  95. cat >> ${cmd} << EOF
  96. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  97. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$l_old|$l_new"); do
  98. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  99. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  100. errorcheck
  101. fi
  102. done
  103. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  104. fi
  105. EOF
  106. else
  107. cat >> ${cmd} << EOF
  108. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  109. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  110. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  111. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  112. errorcheck
  113. fi
  114. done
  115. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  116. fi
  117. EOF
  118. fi
  119.  
  120. echo "echo \"\`date\`: Local baseline replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  121. echo "echo \"\" >> $log" >> ${cmd}
  122. echo "exit 0" >> ${cmd}
  123. echo "" >> ${cmd}
  124. }
  125.  
  126. r_base()
  127. {
  128. ##
  129. ## Makes remote baseline replication
  130. ##
  131.  
  132. SSH="ssh"
  133.  
  134. if [ "${compress}" = "yes" ]; then
  135. SSH="ssh -C"
  136. fi
  137.  
  138. if [ ! -z ${port} ]; then
  139. SSH="${SSH} -p ${port}"
  140. else
  141. SSH="${SSH} -p 22"
  142. fi
  143.  
  144. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  145. echo "`date`: Remote baseline replication sequence aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  146. echo "" >> $log
  147. rm ${pid}
  148. exit 1
  149. elif zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  150. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  151. echo "" >> $log
  152. rm ${pid}
  153. exit 1
  154. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  155. echo "`date`: A Scrub or Resilver is currently in progress on remote pool, aborting." >> $log
  156. echo "" >> $log
  157. rm ${pid}
  158. exit 1
  159. fi
  160.  
  161. if [ "$r" = "yes" ]
  162. then
  163. SNAPSHOT="zfs snapshot -r"
  164. SEND="zfs send -R"
  165. else
  166. SNAPSHOT="zfs snapshot"
  167. SEND="zfs send -p"
  168. fi
  169.  
  170. tfs=$(echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//")
  171.  
  172. echo "echo \"\`date\`: Beginning remote baseline replication sequence on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  173.  
  174. # Take initial snapshot(s):
  175. echo "sudo ${SNAPSHOT} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  176. echo errorcheck >> ${cmd}
  177. echo "echo \"\`date\`: Initial snapshot(s) created\" >> $log" >> ${cmd}
  178.  
  179. # Replicate data:
  180. echo "sudo ${SEND} $lfs$r_old 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  181. echo errorcheck >> ${cmd}
  182. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  183.  
  184. # Clean unwanted, scheduled source snapshots from target:
  185. if [ "$r" = "yes" ]; then
  186. cat >> ${cmd} << EOF
  187. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  188. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$r_old|$r_new"); do
  189. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  190. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  191. errorcheck
  192. fi
  193. done
  194. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  195. fi
  196. EOF
  197. else
  198. cat >> ${cmd} << EOF
  199. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  200. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  201. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  202. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  203. errorcheck
  204. fi
  205. done
  206. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  207. fi
  208. EOF
  209. fi
  210.  
  211. echo "echo \"\`date\`: Remote baseline replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  212. echo "echo \"\" >> $log" >> ${cmd}
  213. echo "exit 0" >> ${cmd}
  214. echo "" >> ${cmd}
  215. }
  216.  
  217. l_inc()
  218. {
  219. ##
  220. ## Makes local incremental replication
  221. ##
  222.  
  223. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  224. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  225. echo "" >> $log
  226. rm ${pid}
  227. exit 1
  228. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  229. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  230. echo "" >> $log
  231. rm ${pid}
  232. exit 1
  233. fi
  234.  
  235. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//"`
  236.  
  237. echo "`date`: Beginning local incremental replication sequence on \"$lfs\"" >> $log
  238.  
  239. # Take new source snapshot(s):
  240.  
  241. if [ "$r" = "yes" ]; then
  242. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  243. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  244. DESTBASE=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)
  245. DESTDELTA=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta)
  246. SNAPSHOT="zfs snapshot -r"
  247. SEND="zfs send -R"
  248. DESTROY="zfs destroy -r"
  249. RENAME="zfs rename -r"
  250. else
  251. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  252. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  253. DESTBASE=$(zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')
  254. DESTDELTA=$(zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}')
  255. SNAPSHOT="zfs snapshot"
  256. SEND="zfs send -p"
  257. DESTROY="zfs destroy"
  258. RENAME="zfs rename"
  259. fi
  260.  
  261. rebaseline()
  262. {
  263. echo "echo \"\`date\`: No appropriate correctional steps where found, have to rebaseline\" >> $log" >> ${cmd}
  264. if [ ${SOURCEBASE} -gt "0" ]; then
  265. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  266. echo errorcheck >> ${cmd}
  267. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  268. fi
  269. if [ ${SOURCEDELTA} -gt "0" ]; then
  270. echo "sudo ${DESTROY} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  271. echo errorcheck >> ${cmd}
  272. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  273. fi
  274. if [ ${DESTBASE} -gt "0" ]; then
  275. echo "sudo ${DESTROY} $tfs$l_old 2>> ${ERR}" >> ${cmd}
  276. echo errorcheck >> ${cmd}
  277. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  278. fi
  279. if [ ${DESTDELTA} -gt "0" ]; then
  280. echo "sudo ${DESTROY} $tfs$l_new 2>> ${ERR}" >> ${cmd}
  281. echo errorcheck >> ${cmd}
  282. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  283. fi
  284. if [ "$r" != "yes" ]; then
  285. cat >> ${cmd} << EOF
  286. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  287. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  288. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  289. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  290. errorcheck
  291. fi
  292. done
  293. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  294. fi
  295. EOF
  296. fi
  297. if [ $(zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}') -gt "0" ]; then
  298. echo "sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  299. echo errorcheck >> ${cmd}
  300. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  301. fi
  302. echo "sudo ${SNAPSHOT} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  303. echo errorcheck >> ${cmd}
  304. echo "echo \"\`date\`: Base snapshot(s) created\" >> $log" >> ${cmd}
  305. echo "sudo ${SEND} $lfs$l_old 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  306. echo errorcheck >> ${cmd}
  307. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  308. echo "echo \"\`date\`: Rebaseline complete\" >> $log" >> ${cmd}
  309. echo "sudo ${SNAPSHOT} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  310. echo errorcheck >> ${cmd}
  311. echo "echo \"\`date\`: Delta snapshot(s) created\" >> $log" >> ${cmd}
  312. }
  313.  
  314. dryrun()
  315. {
  316. DRYRUNOUT=$(mktemp /tmp/replicate.dryrun.XXXXXX)
  317.  
  318. if [ "$r" = "yes" ]
  319. then
  320. DODRYRUN=`zfs send -vRi $lfs$l_old $lfs$l_new 2> ${DRYRUNOUT} | zfs recv -vdn $lp 2>> ${DRYRUNOUT}`
  321. else
  322. DODRYRUN=`zfs send -vi $lfs$l_old $lfs$l_new 2> ${DRYRUNOUT} | zfs recv -vdn $lp 2>> ${DRYRUNOUT}`
  323. fi
  324.  
  325. DRYRUNOK=`egrep '(could not send|cannot receive)' ${DRYRUNOUT} | wc -l | sed 's/^[ \t]*//'`
  326. rm ${DRYRUNOUT}; echo ${DRYRUNOK}
  327. }
  328.  
  329. if [ ${SOURCEBASE} -gt "0" ]
  330. then
  331. if [ ${SOURCEDELTA} -gt "0" ]
  332. then
  333. if [ ${DESTBASE} -gt "0" ]
  334. then
  335. if [ ${DESTDELTA} -gt "0" ]
  336. then
  337. if [ ${DESTDELTA} = ${DESTBASE} ]
  338. then
  339. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  340. ${DESTROY} $tfs$l_old 2>> ${ERR}
  341. errorcheck
  342. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  343. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  344. errorcheck
  345. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  346. ${DESTROY} $lfs$l_old 2>> ${ERR}
  347. errorcheck
  348. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  349. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  350. errorcheck
  351. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  352. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  353. errorcheck
  354. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  355. if [ `dryrun` -gt "0" ]
  356. then
  357. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  358. rebaseline
  359. else
  360. echo "`date`: Dry run successful, OK to resend" >> $log
  361. fi
  362. else
  363. rebaseline
  364. fi
  365. else
  366. if [ `dryrun` -gt "0" ]
  367. then
  368. echo "`date`: Dry run unsuccessful" >> $log
  369. echo "`date`: Probably interrupted while destroying source .base(s)" >> $log
  370. ${DESTROY} $lfs$l_old 2>> ${ERR}
  371. errorcheck
  372. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  373. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  374. errorcheck
  375. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  376. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  377. errorcheck
  378. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  379. if [ `dryrun` -gt "0" ]
  380. then
  381. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  382. rebaseline
  383. else
  384. echo "`date`: Dry run successful, OK to resend" >> $log
  385. fi
  386. else
  387. echo "`date`: Dry run successful" >> $log
  388. echo "`date`: Probably interrupted while transferring, resending" >> $log
  389. fi
  390. fi
  391. else
  392. if [ ${DESTDELTA} -gt "0" ]
  393. then
  394. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  395. then
  396. echo "`date`: Probably interrupted while renaming target .delta to .base" >> $log
  397. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  398. errorcheck
  399. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  400. ${DESTROY} $lfs$l_old 2>> ${ERR}
  401. errorcheck
  402. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  403. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  404. errorcheck
  405. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  406. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  407. errorcheck
  408. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  409. if [ `dryrun` -gt "0" ]
  410. then
  411. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  412. rebaseline
  413. else
  414. echo "`date`: Dry run successful, OK to resend" >> $log
  415. fi
  416. else
  417. rebaseline
  418. fi
  419. else
  420. rebaseline
  421. fi
  422. fi
  423. else
  424. if [ ${DESTBASE} -gt "0" ]
  425. then
  426. if [ ${DESTDELTA} -gt "0" ]
  427. then
  428. ${DESTROY} $tfs$l_new 2>> ${ERR}
  429. errorcheck
  430. echo "`date`: Target .delta snapshot(s) found and destroyed" >> $log
  431. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  432. errorcheck
  433. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  434. else
  435. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  436. errorcheck
  437. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  438. if [ `dryrun` -gt "0" ]
  439. then
  440. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  441. rebaseline
  442. else
  443. echo "`date`: Dry run successful, OK to resend" >> $log
  444. fi
  445. fi
  446. else
  447. if [ ${DESTDELTA} -gt "0" ]
  448. then
  449. if [ ${SOURCEBASE} = ${DESTDELTA} ]
  450. then
  451. echo "`date`: No target .base found, but source .base matches target .delta. Renaming target .delta to .base" >> $log
  452. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  453. errorcheck
  454. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  455. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  456. errorcheck
  457. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  458. if [ `dryrun` -gt "0" ]
  459. then
  460. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  461. rebaseline
  462. else
  463. echo "`date`: Dry run successful, OK to resend" >> $log
  464. fi
  465. else
  466. rebaseline
  467. fi
  468. else
  469. rebaseline
  470. fi
  471. fi
  472. fi
  473. else
  474. if [ ${SOURCEDELTA} -gt "0" ]
  475. then
  476. if [ ${DESTBASE} -gt "0" ]
  477. then
  478. if [ ${DESTDELTA} -gt "0" ]
  479. then
  480. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  481. then
  482. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  483. ${DESTROY} $tfs$l_old 2>> ${ERR}
  484. errorcheck
  485. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  486. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  487. errorcheck
  488. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  489. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  490. errorcheck
  491. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  492. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  493. errorcheck
  494. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  495. if [ `dryrun` -gt "0" ]
  496. then
  497. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  498. rebaseline
  499. else
  500. echo "`date`: Dry run successful, OK to resend" >> $log
  501. fi
  502. else
  503. rebaseline
  504. fi
  505. else
  506. if [ ${SOURCEDELTA} = ${DESTBASE} ]
  507. then
  508. echo "`date`: Probably interrupted while renaming source .delta to .base" >> $log
  509. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  510. errorcheck
  511. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  512. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  513. errorcheck
  514. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  515. if [ `dryrun` -gt "0" ]
  516. then
  517. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  518. rebaseline
  519. else
  520. echo "`date`: Dry run successful, OK to resend" >> $log
  521. fi
  522. else
  523. rebaseline
  524. fi
  525. fi
  526. else
  527. if [ ${DESTDELTA} -gt "0" ]
  528. then
  529. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  530. then
  531. echo "`date`: No source or target .base, but source and target .delta matches. Can try to rename them to .base and test a resend"
  532. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  533. errorcheck
  534. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  535. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  536. errorcheck
  537. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  538. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  539. errorcheck
  540. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  541. if [ `dryrun` -gt "0" ]
  542. then
  543. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  544. rebaseline
  545. else
  546. echo "`date`: Dry run successful, OK to resend" >> $log
  547. fi
  548. else
  549. rebaseline
  550. fi
  551. else
  552. rebaseline
  553. fi
  554. fi
  555. else
  556. if [ ${DESTBASE} -gt "0" ]
  557. then
  558. if [ ${DESTDELTA} -gt "0" ]
  559. then
  560. rebaseline
  561. else
  562. rebaseline
  563. fi
  564. else
  565. if [ ${DESTDELTA} -gt "0" ]
  566. then
  567. rebaseline
  568. else
  569. rebaseline
  570. fi
  571. fi
  572. fi
  573. fi
  574.  
  575. # Replicate data:
  576. echo "sudo ${SEND} -i $lfs$l_old $lfs$l_new 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  577. echo errorcheck >> ${cmd}
  578. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  579.  
  580. # Destroy target .base snapshot(s):
  581. if [ "$r" = "yes" ]; then
  582. cat >> ${cmd} << EOF
  583. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)" -gt "0" ]; then
  584. sudo ${DESTROY} $tfs$l_old 2>> ${ERR}
  585. errorcheck
  586. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  587. fi
  588. EOF
  589. else
  590. cat >> ${cmd} << EOF
  591. if [ "\$(zfs list -H -t snapshot -o name $tfs$l_old 2>/dev/null | awk 'END{print NR}')" -gt "0" ]; then
  592. sudo ${DESTROY} $tfs$l_old 2>> ${ERR}
  593. errorcheck
  594. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  595. fi
  596. EOF
  597. fi
  598.  
  599. # Rename target .delta snapshot(s) .old:
  600. echo "sudo ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}" >> ${cmd}
  601. echo errorcheck >> ${cmd}
  602. echo "echo \"\`date\`: Target .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  603.  
  604. # Destroy source .base snapshot(s):
  605. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  606. echo errorcheck >> ${cmd}
  607. echo "echo \"\`date\`: Source .base snapshot(s) destroyed\" >> $log" >> ${cmd}
  608.  
  609. # Rename source .delta snapshot(s) .base:
  610. echo "sudo ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}" >> ${cmd}
  611. echo errorcheck >> ${cmd}
  612. echo "echo \"\`date\`: Source .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  613.  
  614. echo "echo \"\`date\`: Local incremental replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  615. echo "echo \"\" >> $log" >> ${cmd}
  616. echo "exit 0" >> ${cmd}
  617. echo "" >> ${cmd}
  618. }
  619.  
  620. r_inc()
  621. {
  622. ##
  623. ## Makes remote incremental replication
  624. ##
  625.  
  626. SSH="ssh"
  627.  
  628. if [ "${compress}" = "yes" ]; then
  629. SSH="ssh -C"
  630. fi
  631.  
  632. if [ ! -z ${port} ]; then
  633. SSH="${SSH} -p ${port}"
  634. else
  635. SSH="${SSH} -p 22"
  636. fi
  637.  
  638. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  639. echo "`date`: Remote incremental replication sequence aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  640. echo "" >> $log
  641. rm ${pid}
  642. exit 1
  643. fi
  644. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  645. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  646. echo "" >> $log
  647. rm ${pid}
  648. exit 1
  649. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  650. echo "`date`: A Scrub or Resilver is currently in progress on remote pool, aborting." >> $log
  651. echo "" >> $log
  652. rm ${pid}
  653. exit 1
  654. fi
  655.  
  656. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//"`
  657.  
  658. echo "`date`: Beginning remote incremental replication sequence on \"$lfs\"" >> $log
  659.  
  660. # Take new source snapshot(s):
  661.  
  662. REPID="`awk -F':' '{print $6}' /etc/passwd | grep replicator`/.ssh/id_rsa"
  663.  
  664. if [ "$r" = "yes" ]; then
  665. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  666. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  667. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base")
  668. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta")
  669. SNAPSHOT="zfs snapshot -r"
  670. SEND="zfs send -R"
  671. DESTROY="zfs destroy -r"
  672. RENAME="zfs rename -r"
  673. else
  674. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  675. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  676. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}'")
  677. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}'")
  678. SNAPSHOT="zfs snapshot"
  679. SEND="zfs send -p"
  680. DESTROY="zfs destroy"
  681. RENAME="zfs rename"
  682. fi
  683.  
  684. rebaseline()
  685. {
  686. echo "echo \"\`date\`: No appropriate correctional steps where found, have to rebaseline\" >> $log" >> ${cmd}
  687. if [ ${SOURCEBASE} -gt "0" ]; then
  688. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  689. echo errorcheck >> ${cmd}
  690. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  691. fi
  692. if [ ${SOURCEDELTA} -gt "0" ]; then
  693. echo "sudo ${DESTROY} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  694. echo errorcheck >> ${cmd}
  695. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  696. fi
  697. if [ ${DESTBASE} -gt "0" ]; then
  698. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}" >> ${cmd}
  699. echo errorcheck >> ${cmd}
  700. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  701. fi
  702. if [ ${DESTDELTA} -gt "0" ]; then
  703. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}" >> ${cmd}
  704. echo errorcheck >> ${cmd}
  705. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  706. fi
  707. if [ "$r" != "yes" ]; then
  708. cat >> ${cmd} << EOF
  709. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  710. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  711. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  712. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  713. errorcheck
  714. fi
  715. done
  716. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  717. fi
  718. EOF
  719. fi
  720. if [ $(su replicator -c "${SSH} $rh zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}'") -gt "0" ]; then
  721. echo "${SSH} $rh sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  722. echo errorcheck >> ${cmd}
  723. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  724. fi
  725. echo "sudo ${SNAPSHOT} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  726. echo errorcheck >> ${cmd}
  727. echo "echo \"\`date\`: Base snapshot(s) created\" >> $log" >> ${cmd}
  728. echo "sudo ${SEND} $lfs$r_old 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  729. echo errorcheck >> ${cmd}
  730. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  731. echo "echo \"\`date\`: Rebaseline complete\" >> $log" >> ${cmd}
  732. echo "sudo ${SNAPSHOT} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  733. echo errorcheck >> ${cmd}
  734. echo "echo \"\`date\`: Delta snapshot(s) created\" >> $log" >> ${cmd}
  735. }
  736.  
  737. dryrun()
  738. {
  739. DRYRUNOUT=$(mktemp /tmp/replicate.dryrun.XXXXXX)
  740.  
  741. if [ "$r" = "yes" ]
  742. then
  743. DODRYRUN=`zfs send -vRi $lfs$r_old $lfs$r_new 2> ${DRYRUNOUT} | ${SSH} -i ${REPID} replicator@$rh sudo zfs recv -vdn $rp/$lh 2>> ${DRYRUNOUT}`
  744. else
  745. DODRYRUN=`zfs send -vi $lfs$r_old $lfs$r_new 2> ${DRYRUNOUT} | ${SSH} -i ${REPID} replicator@$rh sudo zfs recv -vdn $rp/$lh 2>> ${DRYRUNOUT}`
  746. fi
  747.  
  748. DRYRUNOK=`egrep '(could not send|cannot receive)' ${DRYRUNOUT} | wc -l | sed 's/^[ \t]*//'`
  749. rm ${DRYRUNOUT}; echo ${DRYRUNOK}
  750. }
  751.  
  752. if [ ${SOURCEBASE} -gt "0" ]
  753. then
  754. if [ ${SOURCEDELTA} -gt "0" ]
  755. then
  756. if [ ${DESTBASE} -gt "0" ]
  757. then
  758. if [ ${DESTDELTA} -gt "0" ]
  759. then
  760. if [ ${DESTDELTA} = ${DESTBASE} ]
  761. then
  762. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  763. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  764. errorcheck
  765. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  766. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  767. errorcheck
  768. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  769. ${DESTROY} $lfs$r_old 2>> ${ERR}
  770. errorcheck
  771. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  772. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  773. errorcheck
  774. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  775. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  776. errorcheck
  777. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  778. if [ `dryrun` -gt "0" ]
  779. then
  780. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  781. rebaseline
  782. else
  783. echo "`date`: Dry run successful, OK to resend" >> $log
  784. fi
  785. else
  786. rebaseline
  787. fi
  788. else
  789. if [ `dryrun` -gt "0" ]
  790. then
  791. echo "`date`: Dry run unsuccessful" >> $log
  792. echo "`date`: Probably interrupted while destroying source .base(s)" >> $log
  793. ${DESTROY} $lfs$r_old 2>> ${ERR}
  794. errorcheck
  795. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  796. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  797. errorcheck
  798. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  799. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  800. errorcheck
  801. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  802. if [ `dryrun` -gt "0" ]
  803. then
  804. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  805. rebaseline
  806. else
  807. echo "`date`: Dry run successful, OK to resend" >> $log
  808. fi
  809. else
  810. echo "`date`: Dry run successful" >> $log
  811. echo "`date`: Probably interrupted while transferring, resending" >> $log
  812. fi
  813. fi
  814. else
  815. if [ ${DESTDELTA} -gt "0" ]
  816. then
  817. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  818. then
  819. echo "`date`: Probably interrupted while renaming target .delta to .base" >> $log
  820. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  821. errorcheck
  822. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  823. ${DESTROY} $lfs$r_old 2>> ${ERR}
  824. errorcheck
  825. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  826. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  827. errorcheck
  828. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  829. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  830. errorcheck
  831. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  832. if [ `dryrun` -gt "0" ]
  833. then
  834. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  835. rebaseline
  836. else
  837. echo "`date`: Dry run successful, OK to resend" >> $log
  838. fi
  839. else
  840. rebaseline
  841. fi
  842. else
  843. rebaseline
  844. fi
  845. fi
  846. else
  847. if [ ${DESTBASE} -gt "0" ]
  848. then
  849. if [ ${DESTDELTA} -gt "0" ]
  850. then
  851. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}
  852. errorcheck
  853. echo "`date`: Target .delta snapshot(s) found and destroyed" >> $log
  854. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  855. errorcheck
  856. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  857. else
  858. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  859. errorcheck
  860. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  861. if [ `dryrun` -gt "0" ]
  862. then
  863. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  864. rebaseline
  865. else
  866. echo "`date`: Dry run successful, OK to resend" >> $log
  867. fi
  868. fi
  869. else
  870. if [ ${DESTDELTA} -gt "0" ]
  871. then
  872. if [ ${SOURCEBASE} = ${DESTDELTA} ]
  873. then
  874. echo "`date`: No target .base found, but source .base matches target delta. Renaming target .delta to .base" >> $log
  875. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  876. errorcheck
  877. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  878. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  879. errorcheck
  880. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  881. if [ `dryrun` -gt "0" ]
  882. then
  883. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  884. rebaseline
  885. else
  886. echo "`date`: Dry run successful, OK to resend" >> $log
  887. fi
  888. else
  889. rebaseline
  890. fi
  891. else
  892. rebaseline
  893. fi
  894. fi
  895. fi
  896. else
  897. if [ ${SOURCEDELTA} -gt "0" ]
  898. then
  899. if [ ${DESTBASE} -gt "0" ]
  900. then
  901. if [ ${DESTDELTA} -gt "0" ]
  902. then
  903. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  904. then
  905. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  906. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  907. errorcheck
  908. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  909. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  910. errorcheck
  911. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  912. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  913. errorcheck
  914. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  915. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  916. errorcheck
  917. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  918. if [ `dryrun` -gt "0" ]
  919. then
  920. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  921. rebaseline
  922. else
  923. echo "`date`: Dry run successful, OK to resend" >> $log
  924. fi
  925. else
  926. rebaseline
  927. fi
  928. else
  929. if [ ${SOURCEDELTA} = ${DESTBASE} ]
  930. then
  931. echo "`date`: Probably interrupted while renaming source .delta to .base" >> $log
  932. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  933. errorcheck
  934. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  935. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  936. errorcheck
  937. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  938. if [ `dryrun` -gt "0" ]
  939. then
  940. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  941. rebaseline
  942. else
  943. echo "`date`: Dry run successful, OK to resend" >> $log
  944. fi
  945. else
  946. rebaseline
  947. fi
  948. fi
  949. else
  950. if [ ${DESTDELTA} -gt "0" ]
  951. then
  952. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  953. then
  954. echo "`date`: No source or target base, but source and target delta matches. Can try to rename them to base and test a resend"
  955. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  956. errorcheck
  957. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  958. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  959. errorcheck
  960. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  961. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  962. errorcheck
  963. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  964. if [ `dryrun` -gt "0" ]
  965. then
  966. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  967. rebaseline
  968. else
  969. echo "`date`: Dry run successful, OK to resend" >> $log
  970. fi
  971. else
  972. rebaseline
  973. fi
  974. else
  975. rebaseline
  976. fi
  977. fi
  978. else
  979. if [ ${DESTBASE} -gt "0" ]
  980. then
  981. if [ ${DESTDELTA} -gt "0" ]
  982. then
  983. rebaseline
  984. else
  985. rebaseline
  986. fi
  987. else
  988. if [ ${DESTDELTA} -gt "0" ]
  989. then
  990. rebaseline
  991. else
  992. rebaseline
  993. fi
  994. fi
  995. fi
  996. fi
  997.  
  998. # Replicate data:
  999. echo "sudo ${SEND} -i $lfs$r_old $lfs$r_new 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  1000. echo errorcheck >> ${cmd}
  1001. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  1002.  
  1003. # Destroy target .base snapshot(s):
  1004. if [ "$r" = "yes" ]; then
  1005. cat >> ${cmd} << EOF
  1006. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)" -gt "0" ]; then
  1007. ${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  1008. errorcheck
  1009. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  1010. fi
  1011. EOF
  1012. else
  1013. cat >> ${cmd} << EOF
  1014. if [ "\$(${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')" -gt "0" ]; then
  1015. ${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  1016. errorcheck
  1017. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  1018. fi
  1019. EOF
  1020. fi
  1021.  
  1022. # Rename target .delta snapshot(s) to .base:
  1023. echo "${SSH} $rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}" >> ${cmd}
  1024. echo errorcheck >> ${cmd}
  1025. echo "echo \"\`date\`: Target .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  1026.  
  1027. # Destroy source .base snapshot(s):
  1028. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1029. echo errorcheck >> ${cmd}
  1030. echo "echo \"\`date\`: Source .base snapshot(s) destroyed\" >> $log" >> ${cmd}
  1031.  
  1032. # Rename source .delta snapshot(s) to .base:
  1033. echo "sudo ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1034. echo errorcheck >> ${cmd}
  1035. echo "echo \"\`date\`: Source .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  1036.  
  1037. echo "echo \"\`date\`: Remote incremental replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1038. echo "echo \"\" >> $log" >> ${cmd}
  1039. echo "exit 0" >> ${cmd}
  1040. }
  1041.  
  1042. l_clean()
  1043. {
  1044. ##
  1045. ## Cleans locally replicated file system
  1046. ##
  1047.  
  1048. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1049. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  1050. echo "" >> $log
  1051. rm ${pid}
  1052. exit 1
  1053. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1054. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  1055. echo "" >> $log
  1056. rm ${pid}
  1057. exit 1
  1058. fi
  1059.  
  1060. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//"`
  1061.  
  1062. if [ "$r" = "yes" ]; then
  1063. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  1064. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  1065. DESTBASE=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)
  1066. DESTDELTA=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta)
  1067. SNAPSHOT="zfs snapshot -r"
  1068. SEND="zfs send -R"
  1069. DESTROY="zfs destroy -r"
  1070. RENAME="zfs rename -r"
  1071. else
  1072. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  1073. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  1074. DESTBASE=$(zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')
  1075. DESTDELTA=$(zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}')
  1076. SNAPSHOT="zfs snapshot"
  1077. SEND="zfs send -p"
  1078. DESTROY="zfs destroy"
  1079. RENAME="zfs rename"
  1080. fi
  1081.  
  1082. echo "echo \"\`date\`: Beginning local cleaning process on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1083.  
  1084. if [ ${SOURCEBASE} -gt "0" ]; then
  1085. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  1086. echo errorcheck >> ${cmd}
  1087. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  1088. fi
  1089. if [ ${SOURCEDELTA} -gt "0" ]; then
  1090. echo "sudo ${DESTROY} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  1091. echo errorcheck >> ${cmd}
  1092. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  1093. fi
  1094. if [ ${DESTBASE} -gt "0" ]; then
  1095. echo "sudo ${DESTROY} $tfs$l_old 2>> ${ERR}" >> ${cmd}
  1096. echo errorcheck >> ${cmd}
  1097. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  1098. fi
  1099. if [ ${DESTDELTA} -gt "0" ]; then
  1100. echo "sudo ${DESTROY} $tfs$l_new 2>> ${ERR}" >> ${cmd}
  1101. echo errorcheck >> ${cmd}
  1102. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  1103. fi
  1104. if [ "$r" != "yes" ]; then
  1105. cat >> ${cmd} << EOF
  1106. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  1107. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  1108. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  1109. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  1110. errorcheck
  1111. fi
  1112. done
  1113. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  1114. fi
  1115. EOF
  1116. fi
  1117. if [ $(zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}') -gt "0" ]; then
  1118. echo "sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  1119. echo errorcheck >> ${cmd}
  1120. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  1121. fi
  1122.  
  1123. echo "echo \"\`date\`: Local cleanup complete on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1124. echo "echo \"\" >> $log" >> ${cmd}
  1125. echo "" >> ${cmd}
  1126. }
  1127.  
  1128. r_clean()
  1129. {
  1130. ##
  1131. ## Cleans remotely replicated file systems
  1132. ##
  1133.  
  1134. SSH="ssh"
  1135.  
  1136. if [ "${compress}" = "yes" ]; then
  1137. SSH="ssh -C"
  1138. fi
  1139.  
  1140. if [ ! -z ${port} ]; then
  1141. SSH="${SSH} -p ${port}"
  1142. else
  1143. SSH="${SSH} -p 22"
  1144. fi
  1145.  
  1146. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  1147. echo "`date`: Remote cleanup aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  1148. echo "" >> $log
  1149. rm ${pid}
  1150. exit 1
  1151. elif zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1152. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  1153. echo "" >> $log
  1154. rm ${pid}
  1155. exit 1
  1156. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  1157. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  1158. echo "" >> $log
  1159. rm ${pid}
  1160. exit 1
  1161. fi
  1162.  
  1163. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//"`
  1164.  
  1165. if [ "$r" = "yes" ]; then
  1166. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  1167. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  1168. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base")
  1169. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta")
  1170. SNAPSHOT="zfs snapshot -r"
  1171. SEND="zfs send -R"
  1172. DESTROY="zfs destroy -r"
  1173. RENAME="zfs rename -r"
  1174. else
  1175. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  1176. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  1177. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}'")
  1178. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}'")
  1179. SNAPSHOT="zfs snapshot"
  1180. SEND="zfs send -p"
  1181. DESTROY="zfs destroy"
  1182. RENAME="zfs rename"
  1183. fi
  1184.  
  1185. echo "echo \"\`date\`: Beginning remote cleaning process on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1186.  
  1187. if [ ${SOURCEBASE} -gt "0" ]; then
  1188. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1189. echo errorcheck >> ${cmd}
  1190. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  1191. fi
  1192. if [ ${SOURCEDELTA} -gt "0" ]; then
  1193. echo "sudo ${DESTROY} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  1194. echo errorcheck >> ${cmd}
  1195. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  1196. fi
  1197. if [ ${DESTBASE} -gt "0" ]; then
  1198. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}" >> ${cmd}
  1199. echo errorcheck >> ${cmd}
  1200. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  1201. fi
  1202. if [ ${DESTDELTA} -gt "0" ]; then
  1203. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}" >> ${cmd}
  1204. echo errorcheck >> ${cmd}
  1205. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  1206. fi
  1207. if [ "$r" != "yes" ]; then
  1208. cat >> ${cmd} << EOF
  1209. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  1210. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  1211. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  1212. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  1213. errorcheck
  1214. fi
  1215. done
  1216. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  1217. fi
  1218. EOF
  1219. fi
  1220. if [ $(su replicator -c "${SSH} $rh zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}'") -gt "0" ]; then
  1221. echo "${SSH} $rh sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  1222. echo errorcheck >> ${cmd}
  1223. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  1224. fi
  1225.  
  1226. echo "echo \"\`date\`: Remote cleanup complete on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1227. echo "echo \"\" >> $log" >> ${cmd}
  1228. echo "" >> ${cmd}
  1229. }
  1230.  
  1231. errorcheck()
  1232. {
  1233. if [ "$?" -ne "0" ]; then
  1234. tail -2 ${ERR} >> $log
  1235. tail -10 $log > ${mail}
  1236. mail -s "$subject" $address < ${mail}
  1237. cat /dev/null > ${mail}
  1238. exit 1
  1239. fi
  1240. }
  1241.  
  1242. usenonecipher()
  1243. {
  1244. sed -i '' -e "s/${SSH} $rh sudo zfs recv/${SSH} -oNoneEnabled=yes -oNoneSwitch=yes $rh sudo zfs recv/" ${cmd}
  1245. }
  1246.  
  1247. sanity()
  1248. {
  1249. if [ -e $pid ]; then
  1250. CMD_BASE=$(echo ${cmd} | sed -E 's/\.[[:alnum:]]*$/\./')
  1251.  
  1252. # Since replicate has fired again (that is why it is running now), it has
  1253. # created a new job for _this_ run. So when searching for latest job, we
  1254. # scroll back two steps, instead of just one, which we already know is going
  1255. # to be empty any way:
  1256.  
  1257. LAST_RUN_JOB=$(ls -lU -D %y-%m-%d_%H-%M-%S ${CMD_BASE}* | egrep -o "([0-9]|\-|\_){17}.*$" | sort -n -t '_' -k 1,8 | tail -2 | head -1 | cut -d ' ' -f 2)
  1258. COMMANDS_IN_JOB=$(sed -E -e 's/ \$\{.*\}//' -e 's/^\ {4}if \[ "\$\(//' -e 's# 2>/dev/null.*$##' ${LAST_RUN_JOB} | egrep -v '^(echo|errorcheck|exit|if|fi|for|done|^#|^$)|^\ (\{|\})|^\ {2}(echo|errorcheck|if|fi|for|done)|^\ {4}(tail|mail|cat|exit|errorcheck|if|fi)|^\ {6}errorcheck' | tr '|' '\n' | sed -E -e 's/^\ *//' -e 's/ 2>>.*$//' | tr '\n' '|' | sed 's/|$//')
  1259. if [ $(echo "${COMMANDS_IN_JOB}" | wc -w) -le 0 ]; then
  1260. for JOB in $(ls -lU -D %y-%m-%d_%H-%M-%S ${CMD_BASE}* | egrep -o "([0-9]|\-|\_){17}.*$" | sort -r -n -t '_' -k 1,8 | cut -d ' ' -f 2); do
  1261. COMMANDS_IN_JOB=$(sed -E -e 's/ \$\{.*\}//' -e 's/^\ {4}if \[ "\$\(//' -e 's# 2>/dev/null.*$##' ${JOB} | egrep -v '^(echo|errorcheck|exit|if|fi|for|done|^#|^$)|^\ (\{|\})|^\ {2}(echo|errorcheck|if|fi|for|done)|^\ {4}(tail|mail|cat|exit|errorcheck|if|fi)|^\ {6}errorcheck' | tr '|' '\n' | sed -E -e 's/^\ *//' -e 's/ 2>>.*$//' | tr '\n' '|' | sed 's/|$//')
  1262. if [ $(echo "${COMMANDS_IN_JOB}" | wc -w) -ne 0 ]; then
  1263. LAST_RUN_JOB=${JOB}
  1264. break
  1265. else
  1266. LAST_RUN_JOB=""
  1267. fi
  1268. done
  1269. fi
  1270. replicate_cleanup() {
  1271. REPLICATE_PID_LOOP=$(printf "${REPLICATE_PROCESSES}" | awk '{print$1}')
  1272. for REPLICATE_PID in ${REPLICATE_PID_LOOP}; do
  1273. USED_CPU=$(ps -a -x -o pid,%cpu,command | grep "${REPLICATE_PID}" | grep -v grep | awk '{sub(/\./,"");print$2}')
  1274. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1275.  
  1276. # OK, so there is at least a process there:
  1277.  
  1278. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1279. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1280. fi
  1281. else
  1282. USED_CPU="00"
  1283. fi
  1284. if [ "${USED_CPU}" = "00" ]; then
  1285. USED_CPU="0"
  1286. else
  1287. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1288. fi
  1289. if [ ${USED_CPU} -ne 0 ]; then
  1290. RUNNING_PROCESSES="${RUNNING_PROCESSES} ${REPLICATE_PID}"
  1291. else
  1292. UNKNOWN_STATUS_PROCESSES="${UNKNOWN_STATUS_PROCESSES} ${REPLICATE_PID}"
  1293. fi
  1294. done
  1295. if [ $(echo "${RUNNING_PROCESSES}" | wc -w) -ne 0 ]; then
  1296. echo "$(date): Started a new process, but previous \"replicate\" processes are still running, aborting." >> $log
  1297. echo "" >> $log
  1298. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1299. else
  1300. if [ $(echo "${UNKNOWN_STATUS_PROCESSES}" | wc -w) -ne 0 ]; then
  1301. for UNKNOWN_STATUS_PROCESS in ${UNKNOWN_STATUS_PROCESSES}; do
  1302. if [ $(ps ax | grep "${UNKNOWN_STATUS_PROCESS}" | grep -v grep | egrep -c ' D.* ') -ne 0 ]; then
  1303. ERROR_MESSAGE="The previous replicate command set has been found with no running processes and seems to be stuck. A process from the last run has been found in \"D\" state which can not be killed, reboot required"
  1304. echo "${ERROR_MESSAGE}" >> ${mail}
  1305. echo "$(ps ax | grep "${UNKNOWN_STATUS_PROCESS}" | grep -v grep | egrep ' D.* ')" >> ${mail}
  1306. mail -s "$subject" $address < ${mail}
  1307. echo "$(date): ${ERROR_MESSAGE}" >> $log
  1308. echo "" >> $log
  1309. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1310. else
  1311. kill ${UNKNOWN_STATUS_PROCESS}
  1312. echo "$(date): No process from previous replicate run found running. Killed idle process \"${UNKNOWN_STATUS_PROCESS}\" as part of the cleanup before trying to start again." >> $log
  1313. echo "" >> $log
  1314. fi
  1315. done
  1316. else
  1317. echo "$(date): No idle or running processes from previous replicate run found running." >> $log
  1318. echo "" >> $log
  1319. fi
  1320. fi
  1321. }
  1322. confirm_activity() {
  1323.  
  1324. while [ "$#" -gt "0" ]; do
  1325. case "$1" in
  1326. "-r")
  1327. MODE="$1"
  1328. ;;
  1329. "-d")
  1330. DESTROYING="$1"
  1331. ;;
  1332. *)
  1333. echo "Unknown argument '$1'" >&2
  1334. ;;
  1335. esac
  1336. shift
  1337. done
  1338.  
  1339. if [ "${MODE}" = "-r" ]; then
  1340. if [ "${DESTROYING}" = "-d" ]; then
  1341. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1342. else
  1343. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1344. fi
  1345. else
  1346. if [ "${DESTROYING}" = "-d" ]; then
  1347. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1348. else
  1349. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1350. fi
  1351. fi
  1352. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1353.  
  1354. # OK, so there is at least a process there:
  1355.  
  1356. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1357. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1358. fi
  1359. else
  1360. USED_CPU="00"
  1361. fi
  1362. if [ ${USED_CPU} = "00" ]; then
  1363. USED_CPU="0"
  1364. else
  1365. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1366. fi
  1367. if [ ${USED_CPU} -ne 0 ]; then
  1368.  
  1369. # Found running process:
  1370.  
  1371. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1372. echo "" >> $log
  1373. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1374. else
  1375. if [ "${MODE}" = "-r" ]; then
  1376.  
  1377. # Found 'zfs recv' process but considered idle. Need to check if
  1378. # still active on receiving end.
  1379.  
  1380. SSH="ssh"
  1381. if [ ! -z ${port} ]; then
  1382. SSH="${SSH} -p ${port}"
  1383. else
  1384. SSH="${SSH} -p 22"
  1385. fi
  1386. REPID="$(awk -F':' '{print $6}' /etc/passwd | grep replicator)/.ssh/id_rsa"
  1387. REMOTE_REPLICATE_COMMANDS=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'ssh' | sed -E -e "s/${SSH} ${rh}//" -e 's/.* sudo//' -e 's/^ //' | tr '\n' '|' | sed -E "s/\|$//")
  1388. REMOTE_REPLICATE_PROCESSES=$(${SSH} -i ${REPID} replicator@$rh ps -a -x -o pid,%cpu,command | egrep "zfs send -vRi|zfs recv -vdn|${REMOTE_REPLICATE_COMMANDS}" | egrep -v 'egrep| sudo ')
  1389. if [ "${DESTROYING}" = "-d" ]; then
  1390. USED_CPU=$(printf "${REMOTE_REPLICATE_PROCESSES}" | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1391. else
  1392. USED_CPU=$(printf "${REMOTE_REPLICATE_PROCESSES}" | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1393. fi
  1394. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1395.  
  1396. # OK, so there is at least a process there:
  1397.  
  1398. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1399. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1400. fi
  1401. else
  1402. USED_CPU="00"
  1403. fi
  1404. if [ ${USED_CPU} = "00" ]; then
  1405. USED_CPU="0"
  1406. else
  1407. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1408. fi
  1409. if [ ${USED_CPU} -ne 0 ]; then
  1410.  
  1411. # Found running process:
  1412.  
  1413. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1414. echo "" >> $log
  1415. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1416. else
  1417.  
  1418. # But for crying out loud, it isn't using any CPU time either?
  1419. # OK, what is left to check then, is the size changing?
  1420.  
  1421. LAST_RUN_FILESYSTEM=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'zfs send' | awk '{print $NF}' | cut -d '@' -f 1)
  1422. TARGET_FILESYSTEM=$(echo ${LAST_RUN_FILESYSTEM} | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//")
  1423. check_target_size() {
  1424. ${SSH} -i ${REPID} replicator@$rh zfs get -H -p -o value used ${TARGET_FILESYSTEM}
  1425. }
  1426. SIZE_BEFORE=$(check_target_size)
  1427. sleep 120
  1428. SIZE_AFTER=$(check_target_size)
  1429. if [ "${SIZE_BEFORE}" -ne "${SIZE_AFTER}" ]; then
  1430.  
  1431. # Finally, a hit! Then it should at least be doing something.
  1432.  
  1433. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1434. echo "" >> $log
  1435. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1436. else
  1437. if [ "${DESTROYING}" = "-d" ]; then
  1438.  
  1439. # Hmm, so size hasn't changed on the filesystem either, jeez... OK, last check; is the number of snapshots changing?
  1440.  
  1441. check_number_of_snapshots() {
  1442. ${SSH} -i ${REPID} replicator@$rh zfs list -H -t snapshot -o name -r ${TARGET_FILESYSTEM} 2>/dev/null | awk 'END{print NR}'
  1443. }
  1444. SNAPSHOTS_NUMBER_BEFORE=$(check_number_of_snapshots)
  1445. sleep 120
  1446. SNAPSHOTS_NUMBER_AFTER=$(check_number_of_snapshots)
  1447. if [ "${SNAPSHOTS_NUMBER_BEFORE}" -gt "${SNAPSHOTS_NUMBER_AFTER}" ]; then
  1448.  
  1449. # Epic win! So it's probably removing snapshots, good grief what a hassle:)
  1450.  
  1451. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1452. echo "" >> $log
  1453. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1454. else
  1455. replicate_cleanup
  1456. fi
  1457. else
  1458. replicate_cleanup
  1459. fi
  1460. fi
  1461. fi
  1462. else
  1463.  
  1464. # But for crying out loud, it isn't using any CPU time either?
  1465. # OK, what is left to check then, is the size changing?
  1466.  
  1467. LAST_RUN_FILESYSTEM=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'zfs send' | awk '{print $NF}' | cut -d '@' -f 1)
  1468. TARGET_FILESYSTEM=$(echo ${LAST_RUN_FILESYSTEM} | cut -f 2-512 -d / | sed "s/^/$lp\//")
  1469. check_target_size() {
  1470. zfs get -H -p -o value used ${TARGET_FILESYSTEM}
  1471. }
  1472. SIZE_BEFORE=$(check_target_size)
  1473. sleep 120
  1474. SIZE_AFTER=$(check_target_size)
  1475. if [ "${SIZE_BEFORE}" -ne "${SIZE_AFTER}" ]; then
  1476.  
  1477. # Finally, a hit! Then it should at least be doing something.
  1478.  
  1479. echo "$(date): Started a new process, but the previous local \"replicate\" is still running, aborting." >> $log
  1480. echo "" >> $log
  1481. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1482. else
  1483. if [ "${DESTROYING}" = "-d" ]; then
  1484.  
  1485. # Hmm, so size hasn't changed on the filesystem either, jeez... OK, last check; is the number of snapshots changing?
  1486.  
  1487. check_number_of_snapshots() {
  1488. zfs list -H -t snapshot -o name -r ${TARGET_FILESYSTEM} 2>/dev/null | awk 'END{print NR}'
  1489. }
  1490. SNAPSHOTS_NUMBER_BEFORE=$(check_number_of_snapshots)
  1491. sleep 120
  1492. SNAPSHOTS_NUMBER_AFTER=$(check_number_of_snapshots)
  1493. if [ "${SNAPSHOTS_NUMBER_BEFORE}" -gt "${SNAPSHOTS_NUMBER_AFTER}" ]; then
  1494.  
  1495. # Epic win! So it's probably removing snapshots, good grief what a hassle:)
  1496.  
  1497. echo "$(date): Started a new process, but the previous local \"replicate\" is still running, aborting." >> $log
  1498. echo "" >> $log
  1499. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1500. else
  1501. replicate_cleanup
  1502. fi
  1503. else
  1504. replicate_cleanup
  1505. fi
  1506. fi
  1507. fi
  1508. fi
  1509. }
  1510. confirm_active_recv() {
  1511. if [ ! -z ${rh} ]; then
  1512.  
  1513. # Means it is a remote replication job:
  1514.  
  1515. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'ssh') -ne 0 ]; then
  1516.  
  1517. # Found active 'ssh' transfer:
  1518.  
  1519. if [ $(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | grep -c 'zfs recv') -ne 0 ]; then
  1520.  
  1521. # Found active 'zfs recv':
  1522.  
  1523. confirm_activity -r
  1524. else
  1525.  
  1526. # No active 'zfs recv' found running. Is it destroying?
  1527.  
  1528. if [ $(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | egrep -c 'zfs destroy|zfs list') -ne 0 ]; then
  1529.  
  1530. # It's supposed to be destroying snapshots at the moment. Let's make
  1531. # sure:
  1532.  
  1533. confirm_activity -r -d
  1534. else
  1535. replicate_cleanup
  1536. fi
  1537. fi
  1538. else
  1539.  
  1540. # No active 'ssh' transfer found running when set as remote:
  1541.  
  1542. replicate_cleanup
  1543. fi
  1544. else
  1545.  
  1546. # If not remote, then it is a local replication job:
  1547.  
  1548. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'zfs recv') -ne 0 ]; then
  1549.  
  1550. # Found active 'zfs recv':
  1551.  
  1552. confirm_activity
  1553. else
  1554.  
  1555. # No active 'zfs recv' found running. Is it destroying?
  1556.  
  1557. if [ $(printf "${REPLICATE_PROCESSES}" | egrep -c 'zfs destroy|zfs list') -ne 0 ]; then
  1558.  
  1559. # It's supposed to be destroying snapshots at the moment. Let's make
  1560. # sure:
  1561.  
  1562. confirm_activity -d
  1563. else
  1564. replicate_cleanup
  1565. fi
  1566. fi
  1567. fi
  1568. }
  1569. confirm_replicate_status() {
  1570. if [ $(printf "${REPLICATE_PROCESSES}" | wc -w) -ne 0 ]; then
  1571. if [ $(printf "${REPLICATE_PROCESSES}" | egrep -c 'zfs send|zfs recv|zfs destroy|zfs list') -ne 0 ]; then
  1572. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'zfs send') -ne 0 ]; then
  1573.  
  1574. # Found active 'zfs send':
  1575.  
  1576. confirm_active_recv
  1577. else
  1578.  
  1579. # No active 'zfs send' found running:
  1580.  
  1581. confirm_active_recv
  1582. fi
  1583. else
  1584.  
  1585. # No active 'zfs send' or 'zfs recv' found running. If remote, it
  1586. # might still be working on the receiving end.
  1587.  
  1588. if [ "$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep -c 'ssh')" -ne "0" ]; then
  1589. confirm_activity -r -d
  1590. else
  1591. replicate_cleanup
  1592. fi
  1593. fi
  1594. else
  1595.  
  1596. # No processes left running. Starting new job.
  1597.  
  1598. :
  1599. fi
  1600. }
  1601. if [ $(echo ${LAST_RUN_JOB} | wc -w) -ne 0 ]; then
  1602. if [ $(ps ax | egrep "${LAST_RUN_JOB}" | grep -v egrep -c) -ne 0 ]; then
  1603. REPLICATE_PROCESSES=$(ps -a -x -o pid,%cpu,command | egrep "zfs send -vRi|zfs recv -vdn|${LAST_RUN_JOB}|${COMMANDS_IN_JOB}" | grep -v egrep)
  1604. confirm_replicate_status
  1605. else
  1606.  
  1607. # The last known job to have commands is not running any more:
  1608.  
  1609. replicate_cleanup
  1610. fi
  1611. else
  1612.  
  1613. # No previous job found with valid commands. Retrying with broadened
  1614. # search criteria:
  1615.  
  1616. REPLICATE_PROCESSES=$(ps -a -x -o pid,%cpu,command | egrep 'zfs send|zfs recv|zfs snapshot|zfs rename|zfs destroy|zfs list' | egrep -v 'egrep|zfSnap')
  1617. confirm_replicate_status
  1618. fi
  1619. fi
  1620. chown replicator ${mail}; chmod 600 ${mail}
  1621. chown replicator ${ERR}; chmod 600 ${ERR}
  1622. chown replicator ${cmd}; chmod 700 ${cmd}
  1623. cat > ${cmd} << EOF
  1624. #!/bin/sh
  1625. errorcheck()
  1626. {
  1627. if [ "\$?" -ne "0" ]; then
  1628. tail -2 ${ERR} >> $log
  1629. tail -10 $log > ${mail}
  1630. mail -s "$subject" $address < ${mail}
  1631. cat /dev/null > ${mail}
  1632. exit 1
  1633. fi
  1634. }
  1635.  
  1636. EOF
  1637. }
  1638.  
  1639. autoclean()
  1640. {
  1641. ##
  1642. ## Find and destroy all automatically taken snapshots
  1643. ##
  1644.  
  1645. zfs list -t snapshot -o name | grep auto > ${cmd}
  1646. cat ${cmd}
  1647. sed -i '' 's/^/sudo zfs destroy /' ${cmd}
  1648. }
  1649.  
  1650. allow()
  1651. {
  1652. sudoers="/usr/local/etc/sudoers"
  1653.  
  1654. if [ `grep "ssh" ${cmd} | wc -l` -gt "0" ]
  1655. then
  1656. allows=`grep zfs ${cmd} | cut -f 1 -d ">" | tr '|' '\n' | sed -e 's/^ //' -e 's/ 2$//' | sed -e 's/sudo //' -e 's/^ //' -e 's/^/replicator ALL=(ALL) NOPASSWD: \/sbin\//' | grep -v ssh`
  1657. else
  1658. allows=`grep zfs ${cmd} | cut -f 1 -d ">" | tr '|' '\n' | sed -e 's/^ //' -e 's/ 2$//' | sed -e 's/sudo //' -e 's/^ //' -e 's/^/replicator ALL=(ALL) NOPASSWD: \/sbin\//'`
  1659. fi
  1660. if [ `grep "replicator" $sudoers | wc -l` -gt "0" ]; then
  1661. sed -i '' '/replicator/d' $sudoers
  1662. fi
  1663. echo "$allows" >> $sudoers
  1664. }
  1665.  
  1666. disallow()
  1667. {
  1668. if [ `grep "replicator" $sudoers | wc -l` -gt "0" ]; then
  1669. sed -i '' '/replicator/d' $sudoers
  1670. fi
  1671. }
  1672.  
  1673. ##
  1674. ## Initial preparation
  1675. ##
  1676.  
  1677. case "$1" in
  1678.  
  1679. "")
  1680. ##
  1681. ## If no jobname is specified, it will find all jobs in config directory and
  1682. ## process them one by one
  1683. ##
  1684.  
  1685. for job in $jobs
  1686. do
  1687. . $job
  1688. sanity
  1689. touch $pid
  1690. if [ -z "$rh" ]
  1691. then
  1692. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @local_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1693. then
  1694. l_inc
  1695. else
  1696. l_base
  1697. fi
  1698. else
  1699. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @remote_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1700. then
  1701. r_inc
  1702. if [ "$none" = "yes" ]; then
  1703. usenonecipher
  1704. fi
  1705. else
  1706. r_base
  1707. if [ "$none" = "yes" ]; then
  1708. usenonecipher
  1709. fi
  1710. fi
  1711. fi
  1712. allow
  1713. su replicator -c ${cmd}
  1714. disallow
  1715. rm $pid
  1716. done
  1717. ;;
  1718.  
  1719. -c)
  1720. ##
  1721. ## When cleaning, if no jobname is specified, it will find all jobs in
  1722. ## config directory and process them one by one
  1723. ##
  1724.  
  1725. if [ "$2" = "auto" ]
  1726. then
  1727. else
  1728. echo ""
  1729. echo "Starting off by deleting replication snapshot(s) and cleaning the entire"
  1730. echo "target pool zfs filesystem. This can take a while, depending on how much"
  1731. echo "is stored."
  1732. sleep 10
  1733. fi
  1734.  
  1735. if [ "$2" = "" ]; then
  1736. for job in `find $path -type f ! -iname sample_* ! -iname noauto_* | sort -rd`
  1737. do
  1738. . $job
  1739. sanity
  1740. touch $pid
  1741. if [ -z "$rh" ]
  1742. then
  1743. l_clean
  1744. else
  1745. r_clean
  1746. fi
  1747. allow
  1748. su replicator -c ${cmd}
  1749. disallow
  1750. rm $pid
  1751. done
  1752. exit
  1753. fi
  1754.  
  1755. ##
  1756. ## Also when cleaning, if you type in shorthand jobname, it will only clean
  1757. ## specified job
  1758. ##
  1759.  
  1760. if [ -e ${path}$2 ]; then
  1761. . $path$2
  1762. sanity
  1763. touch $pid
  1764. if [ -z "$rh" ]
  1765. then
  1766. l_clean
  1767. else
  1768. r_clean
  1769. fi
  1770. allow
  1771. su replicator -c ${cmd}
  1772. disallow
  1773. rm $pid
  1774. else
  1775.  
  1776. ##
  1777. ## If you type "auto" when cleaning, it finds and destroys all snapshots with
  1778. ## the word "auto" in them.
  1779. ##
  1780.  
  1781. if [ "$2" = "auto" ]
  1782. then
  1783. sanity
  1784. touch $pid
  1785. autoclean
  1786. while true
  1787. do
  1788. read -r -p 'These snapshot(s) will destroyed. Confirm? "Y|n"' choice
  1789. case "$choice" in
  1790. y|Y) allow
  1791. su replicator -c ${cmd}
  1792. disallow
  1793. rm $pid
  1794. check=`zfs list -t snapshot`
  1795. result="no datasets available"
  1796. if [ "$check" = "$result" ]; then
  1797. echo "Seems clean enough"
  1798. else
  1799. echo "Seems there are snaps left:"
  1800. zfs list -t snapshot
  1801. fi; break ;;
  1802. n|N) echo "Suit yourself..." ; break ;;
  1803. *) echo "Suit yourself..." ; break ;;
  1804. esac
  1805. done
  1806. else
  1807. echo ""
  1808. echo "### Error! ###"
  1809. echo "Jobname \"$2\" does not exist"
  1810. usage
  1811. fi
  1812. fi
  1813. ;;
  1814.  
  1815. -h)
  1816. usage
  1817. ;;
  1818.  
  1819. *)
  1820. ##
  1821. ## If you type in shorthand jobname, it will only process specified job
  1822. ##
  1823.  
  1824. if [ -e ${path}$1 ]
  1825. then
  1826. . $path$1
  1827. sanity
  1828. touch $pid
  1829. if [ -z "$rh" ]
  1830. then
  1831. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @local_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1832. then
  1833. l_inc
  1834. else
  1835. l_base
  1836. fi
  1837. else
  1838. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @remote_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1839. then
  1840. r_inc
  1841. if [ "$none" = "yes" ]; then
  1842. usenonecipher
  1843. fi
  1844. else
  1845. r_base
  1846. if [ "$none" = "yes" ]; then
  1847. usenonecipher
  1848. fi
  1849. fi
  1850. fi
  1851. allow
  1852. su replicator -c ${cmd}
  1853. disallow
  1854. rm $pid
  1855. else
  1856. echo ""
  1857. echo "### Error! ###"
  1858. echo "Jobname \"$1\" does not exist"
  1859. usage
  1860. fi
  1861. ;;
  1862.  
  1863. esac
  1864.  
  1865. ##
  1866. ## Final cleanup
  1867. ##
  1868.  
  1869. rm -f /tmp/replicate.*
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement