Guest User

replicate

a guest
Apr 16th, 2015
812
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 67.41 KB | None | 0 0
  1. #!/bin/sh
  2.  
  3. ##
  4. ## Initial and continous ZFS filesystems replication
  5. ##
  6.  
  7. PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
  8.  
  9. if [ ! `whoami` = "root" ]; then
  10. echo "You need to be root."
  11. exit
  12. fi
  13.  
  14.  
  15. l_old="@local_replicate.base"
  16. r_old="@remote_replicate.base"
  17. l_new="@local_replicate.delta"
  18. r_new="@remote_replicate.delta"
  19. log="/var/log/replicate.log"
  20. cmd=$(mktemp /tmp/replicate.job.XXXXXX)
  21. ERR=$(mktemp /tmp/replicate.err.XXXXXX)
  22. pid="/var/run/replicate.pid"
  23. path="/usr/local/etc/replicatejobs/"
  24. jobs=`find $path -type f ! -iname sample_* ! -iname noauto_* | sort -d`
  25.  
  26. ##
  27. ## Mail settings
  28. ##
  29. subject="A replication error has occurred!"
  30. mail=$(mktemp /tmp/replicate.mail.XXXXXX)
  31.  
  32. usage()
  33. {
  34. echo ""
  35. echo "Usage: `basename $0` [Options] [Flags] [Job]"
  36. echo ""
  37. echo "Options: -c|-h"
  38. echo "-c: Clean."
  39. echo " removes replicated filesystems and snapshot(s)."
  40. echo "-h: Usage."
  41. echo ""
  42. echo "Flags: auto"
  43. echo "auto: Clean auto."
  44. echo " removes all scheduled snapshot(s)."
  45. echo ""
  46. echo "Examples: `basename $0` | `basename $0` jobname"
  47. echo " `basename $0` -c | `basename $0` -c jobname | `basename $0` -c auto"
  48. echo ""
  49. }
  50.  
  51. l_base()
  52. {
  53. ##
  54. ## Makes local baseline replication
  55. ##
  56.  
  57. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  58. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  59. echo "" >> $log
  60. rm ${pid}
  61. exit 1
  62. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  63. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  64. echo "" >> $log
  65. rm ${pid}
  66. exit 1
  67. fi
  68.  
  69. if [ "$r" = "yes" ]
  70. then
  71. SNAPSHOT="zfs snapshot -r"
  72. SEND="zfs send -R"
  73. else
  74. SNAPSHOT="zfs snapshot"
  75. SEND="zfs send -p"
  76. fi
  77.  
  78. tfs=$(echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//")
  79.  
  80. echo "echo \"\`date\`: Beginning local baseline replication sequence on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  81.  
  82. # Take initial snapshot(s):
  83. echo "sudo ${SNAPSHOT} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  84. echo errorcheck >> ${cmd}
  85. echo "echo \"\`date\`: Initial snapshot(s) created\" >> $log" >> ${cmd}
  86.  
  87. # Replicate data:
  88. echo "sudo ${SEND} $lfs$l_old 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  89. echo errorcheck >> ${cmd}
  90. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  91.  
  92. # Delete unwanted, scheduled source snapshots from target:
  93. if [ "$r" = "yes" ]; then
  94. cat >> ${cmd} << EOF
  95. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  96. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$l_old|$l_new"); do
  97. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  98. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  99. errorcheck
  100. fi
  101. done
  102. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  103. fi
  104. EOF
  105. else
  106. cat >> ${cmd} << EOF
  107. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  108. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  109. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  110. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  111. errorcheck
  112. fi
  113. done
  114. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  115. fi
  116. EOF
  117. fi
  118.  
  119. echo "echo \"\`date\`: Local baseline replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  120. echo "echo \"\" >> $log" >> ${cmd}
  121. echo "exit 0" >> ${cmd}
  122. echo "" >> ${cmd}
  123. }
  124.  
  125. r_base()
  126. {
  127. ##
  128. ## Makes remote baseline replication
  129. ##
  130.  
  131. SSH="ssh"
  132.  
  133. if [ "${compress}" = "yes" ]; then
  134. SSH="ssh -C"
  135. fi
  136.  
  137. if [ ! -z ${port} ]; then
  138. SSH="${SSH} -p ${port}"
  139. else
  140. SSH="${SSH} -p 22"
  141. fi
  142.  
  143. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  144. echo "`date`: Remote baseline replication sequence aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  145. echo "" >> $log
  146. rm ${pid}
  147. exit 1
  148. elif zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  149. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  150. echo "" >> $log
  151. rm ${pid}
  152. exit 1
  153. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  154. echo "`date`: A Scrub or Resilver is currently in progress on remote pool, aborting." >> $log
  155. echo "" >> $log
  156. rm ${pid}
  157. exit 1
  158. fi
  159.  
  160. if [ "$r" = "yes" ]
  161. then
  162. SNAPSHOT="zfs snapshot -r"
  163. SEND="zfs send -R"
  164. else
  165. SNAPSHOT="zfs snapshot"
  166. SEND="zfs send -p"
  167. fi
  168.  
  169. tfs=$(echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//")
  170.  
  171. echo "echo \"\`date\`: Beginning remote baseline replication sequence on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  172.  
  173. # Take initial snapshot(s):
  174. echo "sudo ${SNAPSHOT} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  175. echo errorcheck >> ${cmd}
  176. echo "echo \"\`date\`: Initial snapshot(s) created\" >> $log" >> ${cmd}
  177.  
  178. # Replicate data:
  179. echo "sudo ${SEND} $lfs$r_old 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  180. echo errorcheck >> ${cmd}
  181. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  182.  
  183. # Clean unwanted, scheduled source snapshots from target:
  184. if [ "$r" = "yes" ]; then
  185. cat >> ${cmd} << EOF
  186. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  187. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | egrep -v "$r_old|$r_new"); do
  188. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  189. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  190. errorcheck
  191. fi
  192. done
  193. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  194. fi
  195. EOF
  196. else
  197. cat >> ${cmd} << EOF
  198. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  199. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  200. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  201. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  202. errorcheck
  203. fi
  204. done
  205. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  206. fi
  207. EOF
  208. fi
  209.  
  210. echo "echo \"\`date\`: Remote baseline replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  211. echo "echo \"\" >> $log" >> ${cmd}
  212. echo "exit 0" >> ${cmd}
  213. echo "" >> ${cmd}
  214. }
  215.  
  216. l_inc()
  217. {
  218. ##
  219. ## Makes local incremental replication
  220. ##
  221.  
  222. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  223. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  224. echo "" >> $log
  225. rm ${pid}
  226. exit 1
  227. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  228. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  229. echo "" >> $log
  230. rm ${pid}
  231. exit 1
  232. fi
  233.  
  234. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//"`
  235.  
  236. echo "`date`: Beginning local incremental replication sequence on \"$lfs\"" >> $log
  237.  
  238. # Take new source snapshot(s):
  239.  
  240. if [ "$r" = "yes" ]; then
  241. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  242. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  243. DESTBASE=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)
  244. DESTDELTA=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta)
  245. SNAPSHOT="zfs snapshot -r"
  246. SEND="zfs send -R"
  247. DESTROY="zfs destroy -r"
  248. RENAME="zfs rename -r"
  249. else
  250. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  251. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  252. DESTBASE=$(zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')
  253. DESTDELTA=$(zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}')
  254. SNAPSHOT="zfs snapshot"
  255. SEND="zfs send -p"
  256. DESTROY="zfs destroy"
  257. RENAME="zfs rename"
  258. fi
  259.  
  260. rebaseline()
  261. {
  262. echo "echo \"\`date\`: No appropriate correctional steps where found, have to rebaseline\" >> $log" >> ${cmd}
  263. if [ ${SOURCEBASE} -gt "0" ]; then
  264. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  265. echo errorcheck >> ${cmd}
  266. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  267. fi
  268. if [ ${SOURCEDELTA} -gt "0" ]; then
  269. echo "sudo ${DESTROY} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  270. echo errorcheck >> ${cmd}
  271. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  272. fi
  273. if [ ${DESTBASE} -gt "0" ]; then
  274. echo "sudo ${DESTROY} $tfs$l_old 2>> ${ERR}" >> ${cmd}
  275. echo errorcheck >> ${cmd}
  276. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  277. fi
  278. if [ ${DESTDELTA} -gt "0" ]; then
  279. echo "sudo ${DESTROY} $tfs$l_new 2>> ${ERR}" >> ${cmd}
  280. echo errorcheck >> ${cmd}
  281. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  282. fi
  283. if [ "$r" != "yes" ]; then
  284. cat >> ${cmd} << EOF
  285. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  286. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  287. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  288. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  289. errorcheck
  290. fi
  291. done
  292. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  293. fi
  294. EOF
  295. fi
  296. if [ $(zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}') -gt "0" ]; then
  297. echo "sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  298. echo errorcheck >> ${cmd}
  299. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  300. fi
  301. echo "sudo ${SNAPSHOT} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  302. echo errorcheck >> ${cmd}
  303. echo "echo \"\`date\`: Base snapshot(s) created\" >> $log" >> ${cmd}
  304. echo "sudo ${SEND} $lfs$l_old 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  305. echo errorcheck >> ${cmd}
  306. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  307. echo "echo \"\`date\`: Rebaseline complete\" >> $log" >> ${cmd}
  308. echo "sudo ${SNAPSHOT} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  309. echo errorcheck >> ${cmd}
  310. echo "echo \"\`date\`: Delta snapshot(s) created\" >> $log" >> ${cmd}
  311. }
  312.  
  313. dryrun()
  314. {
  315. DRYRUNOUT=$(mktemp /tmp/replicate.dryrun.XXXXXX)
  316.  
  317. if [ "$r" = "yes" ]
  318. then
  319. DODRYRUN=`zfs send -vRi $lfs$l_old $lfs$l_new 2> ${DRYRUNOUT} | zfs recv -vdn $lp 2>> ${DRYRUNOUT}`
  320. else
  321. DODRYRUN=`zfs send -vi $lfs$l_old $lfs$l_new 2> ${DRYRUNOUT} | zfs recv -vdn $lp 2>> ${DRYRUNOUT}`
  322. fi
  323.  
  324. DRYRUNOK=`egrep '(could not send|cannot receive)' ${DRYRUNOUT} | wc -l | sed 's/^[ \t]*//'`
  325. rm ${DRYRUNOUT}; echo ${DRYRUNOK}
  326. }
  327.  
  328. if [ ${SOURCEBASE} -gt "0" ]
  329. then
  330. if [ ${SOURCEDELTA} -gt "0" ]
  331. then
  332. if [ ${DESTBASE} -gt "0" ]
  333. then
  334. if [ ${DESTDELTA} -gt "0" ]
  335. then
  336. if [ ${DESTDELTA} = ${DESTBASE} ]
  337. then
  338. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  339. ${DESTROY} $tfs$l_old 2>> ${ERR}
  340. errorcheck
  341. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  342. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  343. errorcheck
  344. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  345. ${DESTROY} $lfs$l_old 2>> ${ERR}
  346. errorcheck
  347. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  348. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  349. errorcheck
  350. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  351. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  352. errorcheck
  353. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  354. if [ `dryrun` -gt "0" ]
  355. then
  356. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  357. rebaseline
  358. else
  359. echo "`date`: Dry run successful, OK to resend" >> $log
  360. fi
  361. else
  362. rebaseline
  363. fi
  364. else
  365. if [ `dryrun` -gt "0" ]
  366. then
  367. echo "`date`: Dry run unsuccessful" >> $log
  368. echo "`date`: Probably interrupted while destroying source .base(s)" >> $log
  369. ${DESTROY} $lfs$l_old 2>> ${ERR}
  370. errorcheck
  371. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  372. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  373. errorcheck
  374. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  375. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  376. errorcheck
  377. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  378. if [ `dryrun` -gt "0" ]
  379. then
  380. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  381. rebaseline
  382. else
  383. echo "`date`: Dry run successful, OK to resend" >> $log
  384. fi
  385. else
  386. echo "`date`: Dry run successful" >> $log
  387. echo "`date`: Probably interrupted while transferring, resending" >> $log
  388. fi
  389. fi
  390. else
  391. if [ ${DESTDELTA} -gt "0" ]
  392. then
  393. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  394. then
  395. echo "`date`: Probably interrupted while renaming target .delta to .base" >> $log
  396. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  397. errorcheck
  398. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  399. ${DESTROY} $lfs$l_old 2>> ${ERR}
  400. errorcheck
  401. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  402. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  403. errorcheck
  404. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  405. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  406. errorcheck
  407. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  408. if [ `dryrun` -gt "0" ]
  409. then
  410. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  411. rebaseline
  412. else
  413. echo "`date`: Dry run successful, OK to resend" >> $log
  414. fi
  415. else
  416. rebaseline
  417. fi
  418. else
  419. rebaseline
  420. fi
  421. fi
  422. else
  423. if [ ${DESTBASE} -gt "0" ]
  424. then
  425. if [ ${DESTDELTA} -gt "0" ]
  426. then
  427. ${DESTROY} $tfs$l_new 2>> ${ERR}
  428. errorcheck
  429. echo "`date`: Target .delta snapshot(s) found and destroyed" >> $log
  430. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  431. errorcheck
  432. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  433. else
  434. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  435. errorcheck
  436. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  437. if [ `dryrun` -gt "0" ]
  438. then
  439. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  440. rebaseline
  441. else
  442. echo "`date`: Dry run successful, OK to resend" >> $log
  443. fi
  444. fi
  445. else
  446. if [ ${DESTDELTA} -gt "0" ]
  447. then
  448. if [ ${SOURCEBASE} = ${DESTDELTA} ]
  449. then
  450. echo "`date`: No target .base found, but source .base matches target .delta. Renaming target .delta to .base" >> $log
  451. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  452. errorcheck
  453. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  454. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  455. errorcheck
  456. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  457. if [ `dryrun` -gt "0" ]
  458. then
  459. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  460. rebaseline
  461. else
  462. echo "`date`: Dry run successful, OK to resend" >> $log
  463. fi
  464. else
  465. rebaseline
  466. fi
  467. else
  468. rebaseline
  469. fi
  470. fi
  471. fi
  472. else
  473. if [ ${SOURCEDELTA} -gt "0" ]
  474. then
  475. if [ ${DESTBASE} -gt "0" ]
  476. then
  477. if [ ${DESTDELTA} -gt "0" ]
  478. then
  479. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  480. then
  481. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  482. ${DESTROY} $tfs$l_old 2>> ${ERR}
  483. errorcheck
  484. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  485. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  486. errorcheck
  487. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  488. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  489. errorcheck
  490. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  491. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  492. errorcheck
  493. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  494. if [ `dryrun` -gt "0" ]
  495. then
  496. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  497. rebaseline
  498. else
  499. echo "`date`: Dry run successful, OK to resend" >> $log
  500. fi
  501. else
  502. rebaseline
  503. fi
  504. else
  505. if [ ${SOURCEDELTA} = ${DESTBASE} ]
  506. then
  507. echo "`date`: Probably interrupted while renaming source .delta to .base" >> $log
  508. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  509. errorcheck
  510. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  511. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  512. errorcheck
  513. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  514. if [ `dryrun` -gt "0" ]
  515. then
  516. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  517. rebaseline
  518. else
  519. echo "`date`: Dry run successful, OK to resend" >> $log
  520. fi
  521. else
  522. rebaseline
  523. fi
  524. fi
  525. else
  526. if [ ${DESTDELTA} -gt "0" ]
  527. then
  528. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  529. then
  530. echo "`date`: No source or target .base, but source and target .delta matches. Can try to rename them to .base and test a resend"
  531. ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}
  532. errorcheck
  533. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  534. ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}
  535. errorcheck
  536. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  537. ${SNAPSHOT} $lfs$l_new 2>> ${ERR}
  538. errorcheck
  539. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  540. if [ `dryrun` -gt "0" ]
  541. then
  542. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  543. rebaseline
  544. else
  545. echo "`date`: Dry run successful, OK to resend" >> $log
  546. fi
  547. else
  548. rebaseline
  549. fi
  550. else
  551. rebaseline
  552. fi
  553. fi
  554. else
  555. if [ ${DESTBASE} -gt "0" ]
  556. then
  557. if [ ${DESTDELTA} -gt "0" ]
  558. then
  559. rebaseline
  560. else
  561. rebaseline
  562. fi
  563. else
  564. if [ ${DESTDELTA} -gt "0" ]
  565. then
  566. rebaseline
  567. else
  568. rebaseline
  569. fi
  570. fi
  571. fi
  572. fi
  573.  
  574. # Replicate data:
  575. echo "sudo ${SEND} -i $lfs$l_old $lfs$l_new 2>> ${ERR} | sudo zfs recv -du $lp 2>> ${ERR}" >> ${cmd}
  576. echo errorcheck >> ${cmd}
  577. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  578.  
  579. # Destroy target .base snapshot(s):
  580. if [ "$r" = "yes" ]; then
  581. cat >> ${cmd} << EOF
  582. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)" -gt "0" ]; then
  583. sudo ${DESTROY} $tfs$l_old 2>> ${ERR}
  584. errorcheck
  585. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  586. fi
  587. EOF
  588. else
  589. cat >> ${cmd} << EOF
  590. if [ "\$(zfs list -H -t snapshot -o name $tfs$l_old 2>/dev/null | awk 'END{print NR}')" -gt "0" ]; then
  591. sudo ${DESTROY} $tfs$l_old 2>> ${ERR}
  592. errorcheck
  593. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  594. fi
  595. EOF
  596. fi
  597.  
  598. # Rename target .delta snapshot(s) .old:
  599. echo "sudo ${RENAME} $tfs$l_new $tfs$l_old 2>> ${ERR}" >> ${cmd}
  600. echo errorcheck >> ${cmd}
  601. echo "echo \"\`date\`: Target .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  602.  
  603. # Destroy source .base snapshot(s):
  604. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  605. echo errorcheck >> ${cmd}
  606. echo "echo \"\`date\`: Source .base snapshot(s) destroyed\" >> $log" >> ${cmd}
  607.  
  608. # Rename source .delta snapshot(s) .base:
  609. echo "sudo ${RENAME} $lfs$l_new $lfs$l_old 2>> ${ERR}" >> ${cmd}
  610. echo errorcheck >> ${cmd}
  611. echo "echo \"\`date\`: Source .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  612.  
  613. echo "echo \"\`date\`: Local incremental replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  614. echo "echo \"\" >> $log" >> ${cmd}
  615. echo "exit 0" >> ${cmd}
  616. echo "" >> ${cmd}
  617. }
  618.  
  619. r_inc()
  620. {
  621. ##
  622. ## Makes remote incremental replication
  623. ##
  624.  
  625. SSH="ssh"
  626.  
  627. if [ "${compress}" = "yes" ]; then
  628. SSH="ssh -C"
  629. fi
  630.  
  631. if [ ! -z ${port} ]; then
  632. SSH="${SSH} -p ${port}"
  633. else
  634. SSH="${SSH} -p 22"
  635. fi
  636.  
  637. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  638. echo "`date`: Remote incremental replication sequence aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  639. echo "" >> $log
  640. rm ${pid}
  641. exit 1
  642. fi
  643. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  644. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  645. echo "" >> $log
  646. rm ${pid}
  647. exit 1
  648. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  649. echo "`date`: A Scrub or Resilver is currently in progress on remote pool, aborting." >> $log
  650. echo "" >> $log
  651. rm ${pid}
  652. exit 1
  653. fi
  654.  
  655. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//"`
  656.  
  657. echo "`date`: Beginning remote incremental replication sequence on \"$lfs\"" >> $log
  658.  
  659. # Take new source snapshot(s):
  660.  
  661. REPID="`awk -F':' '{print $6}' /etc/passwd | grep replicator`/.ssh/id_rsa"
  662.  
  663. if [ "$r" = "yes" ]; then
  664. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  665. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  666. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base")
  667. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta")
  668. SNAPSHOT="zfs snapshot -r"
  669. SEND="zfs send -R"
  670. DESTROY="zfs destroy -r"
  671. RENAME="zfs rename -r"
  672. else
  673. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  674. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  675. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}'")
  676. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}'")
  677. SNAPSHOT="zfs snapshot"
  678. SEND="zfs send -p"
  679. DESTROY="zfs destroy"
  680. RENAME="zfs rename"
  681. fi
  682.  
  683. rebaseline()
  684. {
  685. echo "echo \"\`date\`: No appropriate correctional steps where found, have to rebaseline\" >> $log" >> ${cmd}
  686. if [ ${SOURCEBASE} -gt "0" ]; then
  687. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  688. echo errorcheck >> ${cmd}
  689. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  690. fi
  691. if [ ${SOURCEDELTA} -gt "0" ]; then
  692. echo "sudo ${DESTROY} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  693. echo errorcheck >> ${cmd}
  694. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  695. fi
  696. if [ ${DESTBASE} -gt "0" ]; then
  697. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}" >> ${cmd}
  698. echo errorcheck >> ${cmd}
  699. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  700. fi
  701. if [ ${DESTDELTA} -gt "0" ]; then
  702. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}" >> ${cmd}
  703. echo errorcheck >> ${cmd}
  704. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  705. fi
  706. if [ "$r" != "yes" ]; then
  707. cat >> ${cmd} << EOF
  708. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  709. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  710. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  711. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  712. errorcheck
  713. fi
  714. done
  715. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  716. fi
  717. EOF
  718. fi
  719. if [ $(su replicator -c "${SSH} $rh zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}'") -gt "0" ]; then
  720. echo "${SSH} $rh sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  721. echo errorcheck >> ${cmd}
  722. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  723. fi
  724. echo "sudo ${SNAPSHOT} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  725. echo errorcheck >> ${cmd}
  726. echo "echo \"\`date\`: Base snapshot(s) created\" >> $log" >> ${cmd}
  727. echo "sudo ${SEND} $lfs$r_old 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  728. echo errorcheck >> ${cmd}
  729. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  730. echo "echo \"\`date\`: Rebaseline complete\" >> $log" >> ${cmd}
  731. echo "sudo ${SNAPSHOT} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  732. echo errorcheck >> ${cmd}
  733. echo "echo \"\`date\`: Delta snapshot(s) created\" >> $log" >> ${cmd}
  734. }
  735.  
  736. dryrun()
  737. {
  738. DRYRUNOUT=$(mktemp /tmp/replicate.dryrun.XXXXXX)
  739.  
  740. if [ "$r" = "yes" ]
  741. then
  742. DODRYRUN=`zfs send -vRi $lfs$r_old $lfs$r_new 2> ${DRYRUNOUT} | ${SSH} -i ${REPID} replicator@$rh sudo zfs recv -vdn $rp/$lh 2>> ${DRYRUNOUT}`
  743. else
  744. DODRYRUN=`zfs send -vi $lfs$r_old $lfs$r_new 2> ${DRYRUNOUT} | ${SSH} -i ${REPID} replicator@$rh sudo zfs recv -vdn $rp/$lh 2>> ${DRYRUNOUT}`
  745. fi
  746.  
  747. DRYRUNOK=`egrep '(could not send|cannot receive)' ${DRYRUNOUT} | wc -l | sed 's/^[ \t]*//'`
  748. rm ${DRYRUNOUT}; echo ${DRYRUNOK}
  749. }
  750.  
  751. if [ ${SOURCEBASE} -gt "0" ]
  752. then
  753. if [ ${SOURCEDELTA} -gt "0" ]
  754. then
  755. if [ ${DESTBASE} -gt "0" ]
  756. then
  757. if [ ${DESTDELTA} -gt "0" ]
  758. then
  759. if [ ${DESTDELTA} = ${DESTBASE} ]
  760. then
  761. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  762. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  763. errorcheck
  764. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  765. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  766. errorcheck
  767. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  768. ${DESTROY} $lfs$r_old 2>> ${ERR}
  769. errorcheck
  770. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  771. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  772. errorcheck
  773. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  774. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  775. errorcheck
  776. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  777. if [ `dryrun` -gt "0" ]
  778. then
  779. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  780. rebaseline
  781. else
  782. echo "`date`: Dry run successful, OK to resend" >> $log
  783. fi
  784. else
  785. rebaseline
  786. fi
  787. else
  788. if [ `dryrun` -gt "0" ]
  789. then
  790. echo "`date`: Dry run unsuccessful" >> $log
  791. echo "`date`: Probably interrupted while destroying source .base(s)" >> $log
  792. ${DESTROY} $lfs$r_old 2>> ${ERR}
  793. errorcheck
  794. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  795. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  796. errorcheck
  797. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  798. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  799. errorcheck
  800. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  801. if [ `dryrun` -gt "0" ]
  802. then
  803. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  804. rebaseline
  805. else
  806. echo "`date`: Dry run successful, OK to resend" >> $log
  807. fi
  808. else
  809. echo "`date`: Dry run successful" >> $log
  810. echo "`date`: Probably interrupted while transferring, resending" >> $log
  811. fi
  812. fi
  813. else
  814. if [ ${DESTDELTA} -gt "0" ]
  815. then
  816. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  817. then
  818. echo "`date`: Probably interrupted while renaming target .delta to .base" >> $log
  819. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  820. errorcheck
  821. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  822. ${DESTROY} $lfs$r_old 2>> ${ERR}
  823. errorcheck
  824. echo "`date`: Source .base snapshot(s) destroyed" >> $log
  825. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  826. errorcheck
  827. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  828. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  829. errorcheck
  830. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  831. if [ `dryrun` -gt "0" ]
  832. then
  833. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  834. rebaseline
  835. else
  836. echo "`date`: Dry run successful, OK to resend" >> $log
  837. fi
  838. else
  839. rebaseline
  840. fi
  841. else
  842. rebaseline
  843. fi
  844. fi
  845. else
  846. if [ ${DESTBASE} -gt "0" ]
  847. then
  848. if [ ${DESTDELTA} -gt "0" ]
  849. then
  850. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}
  851. errorcheck
  852. echo "`date`: Target .delta snapshot(s) found and destroyed" >> $log
  853. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  854. errorcheck
  855. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  856. else
  857. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  858. errorcheck
  859. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  860. if [ `dryrun` -gt "0" ]
  861. then
  862. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  863. rebaseline
  864. else
  865. echo "`date`: Dry run successful, OK to resend" >> $log
  866. fi
  867. fi
  868. else
  869. if [ ${DESTDELTA} -gt "0" ]
  870. then
  871. if [ ${SOURCEBASE} = ${DESTDELTA} ]
  872. then
  873. echo "`date`: No target .base found, but source .base matches target delta. Renaming target .delta to .base" >> $log
  874. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  875. errorcheck
  876. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  877. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  878. errorcheck
  879. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  880. if [ `dryrun` -gt "0" ]
  881. then
  882. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  883. rebaseline
  884. else
  885. echo "`date`: Dry run successful, OK to resend" >> $log
  886. fi
  887. else
  888. rebaseline
  889. fi
  890. else
  891. rebaseline
  892. fi
  893. fi
  894. fi
  895. else
  896. if [ ${SOURCEDELTA} -gt "0" ]
  897. then
  898. if [ ${DESTBASE} -gt "0" ]
  899. then
  900. if [ ${DESTDELTA} -gt "0" ]
  901. then
  902. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  903. then
  904. echo "`date`: Probably interrupted while destroying target .base(s)" >> $log
  905. ${SSH} -i ${REPID} replicator@$rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  906. errorcheck
  907. echo "`date`: Target .base snapshot(s) destroyed" >> $log
  908. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  909. errorcheck
  910. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  911. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  912. errorcheck
  913. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  914. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  915. errorcheck
  916. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  917. if [ `dryrun` -gt "0" ]
  918. then
  919. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  920. rebaseline
  921. else
  922. echo "`date`: Dry run successful, OK to resend" >> $log
  923. fi
  924. else
  925. rebaseline
  926. fi
  927. else
  928. if [ ${SOURCEDELTA} = ${DESTBASE} ]
  929. then
  930. echo "`date`: Probably interrupted while renaming source .delta to .base" >> $log
  931. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  932. errorcheck
  933. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  934. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  935. errorcheck
  936. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  937. if [ `dryrun` -gt "0" ]
  938. then
  939. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  940. rebaseline
  941. else
  942. echo "`date`: Dry run successful, OK to resend" >> $log
  943. fi
  944. else
  945. rebaseline
  946. fi
  947. fi
  948. else
  949. if [ ${DESTDELTA} -gt "0" ]
  950. then
  951. if [ ${SOURCEDELTA} = ${DESTDELTA} ]
  952. then
  953. echo "`date`: No source or target base, but source and target delta matches. Can try to rename them to base and test a resend"
  954. ${SSH} -i ${REPID} replicator@$rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}
  955. errorcheck
  956. echo "`date`: Target .delta snapshot(s) renamed .base" >> $log
  957. ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}
  958. errorcheck
  959. echo "`date`: Source .delta snapshot(s) renamed .base" >> $log
  960. ${SNAPSHOT} $lfs$r_new 2>> ${ERR}
  961. errorcheck
  962. echo "`date`: New source .delta snapshot(s) created, proceeding" >> $log
  963. if [ `dryrun` -gt "0" ]
  964. then
  965. echo "`date`: Dry run unsuccessful, rebaselining" >> $log
  966. rebaseline
  967. else
  968. echo "`date`: Dry run successful, OK to resend" >> $log
  969. fi
  970. else
  971. rebaseline
  972. fi
  973. else
  974. rebaseline
  975. fi
  976. fi
  977. else
  978. if [ ${DESTBASE} -gt "0" ]
  979. then
  980. if [ ${DESTDELTA} -gt "0" ]
  981. then
  982. rebaseline
  983. else
  984. rebaseline
  985. fi
  986. else
  987. if [ ${DESTDELTA} -gt "0" ]
  988. then
  989. rebaseline
  990. else
  991. rebaseline
  992. fi
  993. fi
  994. fi
  995. fi
  996.  
  997. # Replicate data:
  998. echo "sudo ${SEND} -i $lfs$r_old $lfs$r_new 2>> ${ERR} | ${SSH} $rh sudo zfs recv -du $rp/$lh 2>> ${ERR}" >> ${cmd}
  999. echo errorcheck >> ${cmd}
  1000. echo "echo \"\`date\`: Data replicated\" >> $log" >> ${cmd}
  1001.  
  1002. # Destroy target .base snapshot(s):
  1003. if [ "$r" = "yes" ]; then
  1004. cat >> ${cmd} << EOF
  1005. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)" -gt "0" ]; then
  1006. ${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  1007. errorcheck
  1008. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  1009. fi
  1010. EOF
  1011. else
  1012. cat >> ${cmd} << EOF
  1013. if [ "\$(${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')" -gt "0" ]; then
  1014. ${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}
  1015. errorcheck
  1016. echo "\$(date): Target .base snapshot(s) destroyed" >> $log
  1017. fi
  1018. EOF
  1019. fi
  1020.  
  1021. # Rename target .delta snapshot(s) to .base:
  1022. echo "${SSH} $rh sudo ${RENAME} $tfs$r_new $tfs$r_old 2>> ${ERR}" >> ${cmd}
  1023. echo errorcheck >> ${cmd}
  1024. echo "echo \"\`date\`: Target .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  1025.  
  1026. # Destroy source .base snapshot(s):
  1027. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1028. echo errorcheck >> ${cmd}
  1029. echo "echo \"\`date\`: Source .base snapshot(s) destroyed\" >> $log" >> ${cmd}
  1030.  
  1031. # Rename source .delta snapshot(s) to .base:
  1032. echo "sudo ${RENAME} $lfs$r_new $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1033. echo errorcheck >> ${cmd}
  1034. echo "echo \"\`date\`: Source .delta snapshot(s) renamed .base\" >> $log" >> ${cmd}
  1035.  
  1036. echo "echo \"\`date\`: Remote incremental replication sequence finished on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1037. echo "echo \"\" >> $log" >> ${cmd}
  1038. echo "exit 0" >> ${cmd}
  1039. }
  1040.  
  1041. l_clean()
  1042. {
  1043. ##
  1044. ## Cleans locally replicated file system
  1045. ##
  1046.  
  1047. if zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1048. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  1049. echo "" >> $log
  1050. rm ${pid}
  1051. exit 1
  1052. elif zpool status $lp | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1053. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  1054. echo "" >> $log
  1055. rm ${pid}
  1056. exit 1
  1057. fi
  1058.  
  1059. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$lp\//"`
  1060.  
  1061. if [ "$r" = "yes" ]; then
  1062. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  1063. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  1064. DESTBASE=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base)
  1065. DESTDELTA=$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta)
  1066. SNAPSHOT="zfs snapshot -r"
  1067. SEND="zfs send -R"
  1068. DESTROY="zfs destroy -r"
  1069. RENAME="zfs rename -r"
  1070. else
  1071. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  1072. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  1073. DESTBASE=$(zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}')
  1074. DESTDELTA=$(zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}')
  1075. SNAPSHOT="zfs snapshot"
  1076. SEND="zfs send -p"
  1077. DESTROY="zfs destroy"
  1078. RENAME="zfs rename"
  1079. fi
  1080.  
  1081. echo "echo \"\`date\`: Beginning local cleaning process on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1082.  
  1083. if [ ${SOURCEBASE} -gt "0" ]; then
  1084. echo "sudo ${DESTROY} $lfs$l_old 2>> ${ERR}" >> ${cmd}
  1085. echo errorcheck >> ${cmd}
  1086. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  1087. fi
  1088. if [ ${SOURCEDELTA} -gt "0" ]; then
  1089. echo "sudo ${DESTROY} $lfs$l_new 2>> ${ERR}" >> ${cmd}
  1090. echo errorcheck >> ${cmd}
  1091. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  1092. fi
  1093. if [ ${DESTBASE} -gt "0" ]; then
  1094. echo "sudo ${DESTROY} $tfs$l_old 2>> ${ERR}" >> ${cmd}
  1095. echo errorcheck >> ${cmd}
  1096. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  1097. fi
  1098. if [ ${DESTDELTA} -gt "0" ]; then
  1099. echo "sudo ${DESTROY} $tfs$l_new 2>> ${ERR}" >> ${cmd}
  1100. echo errorcheck >> ${cmd}
  1101. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  1102. fi
  1103. if [ "$r" != "yes" ]; then
  1104. cat >> ${cmd} << EOF
  1105. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new" | awk 'END{print NR}')" -gt "0" ]; then
  1106. for SNAPSHOT in \$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$l_old|$l_new"); do
  1107. if [ "\$(zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  1108. sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  1109. errorcheck
  1110. fi
  1111. done
  1112. echo "\$(date): Unwanted local target snapshot(s) destroyed" >> $log
  1113. fi
  1114. EOF
  1115. fi
  1116. if [ $(zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}') -gt "0" ]; then
  1117. echo "sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  1118. echo errorcheck >> ${cmd}
  1119. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  1120. fi
  1121.  
  1122. echo "echo \"\`date\`: Local cleanup complete on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1123. echo "echo \"\" >> $log" >> ${cmd}
  1124. echo "" >> ${cmd}
  1125. }
  1126.  
  1127. r_clean()
  1128. {
  1129. ##
  1130. ## Cleans remotely replicated file systems
  1131. ##
  1132.  
  1133. SSH="ssh"
  1134.  
  1135. if [ "${compress}" = "yes" ]; then
  1136. SSH="ssh -C"
  1137. fi
  1138.  
  1139. if [ ! -z ${port} ]; then
  1140. SSH="${SSH} -p ${port}"
  1141. else
  1142. SSH="${SSH} -p 22"
  1143. fi
  1144.  
  1145. if [ `ping -c 1 -W 1000 $rh | grep -oe "[0-9] packets received" | awk '{print $1}'` -eq "0" ]; then
  1146. echo "`date`: Remote cleanup aborted on \"$lfs\"! No response from \"$rh\"." >> $log
  1147. echo "" >> $log
  1148. rm ${pid}
  1149. exit 1
  1150. elif zpool status `echo $lfs | cut -f1 -d /` | grep "scan:" | egrep -qo "(scrub in progress|resilver in progress)"; then
  1151. echo "`date`: A Scrub or Resilver is currently in progress on source pool, aborting." >> $log
  1152. echo "" >> $log
  1153. rm ${pid}
  1154. exit 1
  1155. elif su replicator -c "${SSH} $rh zpool status $rp | grep \"scan:\" | egrep -qo '(scrub in progress|resilver in progress)'"; then
  1156. echo "`date`: A Scrub or Resilver is currently in progress on destination pool, aborting." >> $log
  1157. echo "" >> $log
  1158. rm ${pid}
  1159. exit 1
  1160. fi
  1161.  
  1162. tfs=`echo $lfs | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//"`
  1163.  
  1164. if [ "$r" = "yes" ]; then
  1165. SOURCEBASE=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.base)
  1166. SOURCEDELTA=$(zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep -c replicate.delta)
  1167. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.base")
  1168. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c replicate.delta")
  1169. SNAPSHOT="zfs snapshot -r"
  1170. SEND="zfs send -R"
  1171. DESTROY="zfs destroy -r"
  1172. RENAME="zfs rename -r"
  1173. else
  1174. SOURCEBASE=$(zfs list -H -t snapshot -o name $lfs$r_old 2>/dev/null | awk 'END{print NR}')
  1175. SOURCEDELTA=$(zfs list -H -t snapshot -o name $lfs$r_new 2>/dev/null | awk 'END{print NR}')
  1176. DESTBASE=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_old 2>/dev/null | awk 'END{print NR}'")
  1177. DESTDELTA=$(su replicator -c "${SSH} $rh zfs list -H -t snapshot -o name $tfs$r_new 2>/dev/null | awk 'END{print NR}'")
  1178. SNAPSHOT="zfs snapshot"
  1179. SEND="zfs send -p"
  1180. DESTROY="zfs destroy"
  1181. RENAME="zfs rename"
  1182. fi
  1183.  
  1184. echo "echo \"\`date\`: Beginning remote cleaning process on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1185.  
  1186. if [ ${SOURCEBASE} -gt "0" ]; then
  1187. echo "sudo ${DESTROY} $lfs$r_old 2>> ${ERR}" >> ${cmd}
  1188. echo errorcheck >> ${cmd}
  1189. echo "echo \"\`date\`: Local base snapshot destroyed\" >> $log" >> ${cmd}
  1190. fi
  1191. if [ ${SOURCEDELTA} -gt "0" ]; then
  1192. echo "sudo ${DESTROY} $lfs$r_new 2>> ${ERR}" >> ${cmd}
  1193. echo errorcheck >> ${cmd}
  1194. echo "echo \"\`date\`: Local delta snapshot destroyed\" >> $log" >> ${cmd}
  1195. fi
  1196. if [ ${DESTBASE} -gt "0" ]; then
  1197. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_old 2>> ${ERR}" >> ${cmd}
  1198. echo errorcheck >> ${cmd}
  1199. echo "echo \"\`date\`: Target base snapshot destroyed\" >> $log" >> ${cmd}
  1200. fi
  1201. if [ ${DESTDELTA} -gt "0" ]; then
  1202. echo "${SSH} $rh sudo ${DESTROY} $tfs$r_new 2>> ${ERR}" >> ${cmd}
  1203. echo errorcheck >> ${cmd}
  1204. echo "echo \"\`date\`: Target delta snapshot destroyed\" >> $log" >> ${cmd}
  1205. fi
  1206. if [ "$r" != "yes" ]; then
  1207. cat >> ${cmd} << EOF
  1208. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new" | awk 'END{print NR}')" -gt "0" ]; then
  1209. for SNAPSHOT in \$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep "${tfs}@" | egrep -v "$r_old|$r_new"); do
  1210. if [ "\$(${SSH} $rh zfs list -H -r -t snapshot -o name $tfs 2>/dev/null | grep -c "\${SNAPSHOT}")" -gt "0" ]; then
  1211. ${SSH} $rh sudo zfs destroy \${SNAPSHOT} 2>> ${ERR}
  1212. errorcheck
  1213. fi
  1214. done
  1215. echo "\$(date): Unwanted remote target snapshot(s) destroyed" >> $log
  1216. fi
  1217. EOF
  1218. fi
  1219. if [ $(su replicator -c "${SSH} $rh zfs list -H -o name -r $tfs 2>/dev/null | awk 'END{print NR}'") -gt "0" ]; then
  1220. echo "${SSH} $rh sudo ${DESTROY} $tfs 2>> ${ERR}" >> ${cmd}
  1221. echo errorcheck >> ${cmd}
  1222. echo "echo \"\`date\`: Target filesystem destroyed\" >> $log" >> ${cmd}
  1223. fi
  1224.  
  1225. echo "echo \"\`date\`: Remote cleanup complete on \\\"$lfs\\\"\" >> ${log}" >> ${cmd}
  1226. echo "echo \"\" >> $log" >> ${cmd}
  1227. echo "" >> ${cmd}
  1228. }
  1229.  
  1230. errorcheck()
  1231. {
  1232. if [ "$?" -ne "0" ]; then
  1233. tail -2 ${ERR} >> $log
  1234. tail -10 $log > ${mail}
  1235. mail -s "$subject" $address < ${mail}
  1236. cat /dev/null > ${mail}
  1237. exit 1
  1238. fi
  1239. }
  1240.  
  1241. usenonecipher()
  1242. {
  1243. sed -i '' -e "s/${SSH} $rh sudo zfs recv/${SSH} -oNoneEnabled=yes -oNoneSwitch=yes $rh sudo zfs recv/" ${cmd}
  1244. }
  1245.  
  1246. sanity()
  1247. {
  1248. if [ -e $pid ]; then
  1249. CMD_BASE=$(echo ${cmd} | sed -E 's/\.[[:alnum:]]*$/\./')
  1250.  
  1251. # Since replicate has fired again (that is why it is running now), it has
  1252. # created a new job for _this_ run. So when searching for latest job, we
  1253. # scroll back two steps, instead of just one, which we already know is going
  1254. # to be empty any way:
  1255.  
  1256. LAST_RUN_JOB=$(ls -lU -D %y-%m-%d_%H-%M-%S ${CMD_BASE}* | egrep -o "([0-9]|\-|\_){17}.*$" | sort -n -t '_' -k 1,8 | tail -2 | head -1 | cut -d ' ' -f 2)
  1257. COMMANDS_IN_JOB=$(sed -E -e 's/ \$\{.*\}//' -e 's/^\ {4}if \[ "\$\(//' -e 's# 2>/dev/null.*$##' ${LAST_RUN_JOB} | egrep -v '^(echo|errorcheck|exit|if|fi|for|done|^#|^$)|^\ (\{|\})|^\ {2}(echo|errorcheck|if|fi|for|done)|^\ {4}(tail|mail|cat|exit|errorcheck|if|fi)|^\ {6}errorcheck' | tr '|' '\n' | sed -E -e 's/^\ *//' -e 's/ 2>>.*$//' | tr '\n' '|' | sed 's/|$//')
  1258. if [ $(echo "${COMMANDS_IN_JOB}" | wc -w) -le 0 ]; then
  1259. for JOB in $(ls -lU -D %y-%m-%d_%H-%M-%S ${CMD_BASE}* | egrep -o "([0-9]|\-|\_){17}.*$" | sort -r -n -t '_' -k 1,8 | cut -d ' ' -f 2); do
  1260. COMMANDS_IN_JOB=$(sed -E -e 's/ \$\{.*\}//' -e 's/^\ {4}if \[ "\$\(//' -e 's# 2>/dev/null.*$##' ${JOB} | egrep -v '^(echo|errorcheck|exit|if|fi|for|done|^#|^$)|^\ (\{|\})|^\ {2}(echo|errorcheck|if|fi|for|done)|^\ {4}(tail|mail|cat|exit|errorcheck|if|fi)|^\ {6}errorcheck' | tr '|' '\n' | sed -E -e 's/^\ *//' -e 's/ 2>>.*$//' | tr '\n' '|' | sed 's/|$//')
  1261. if [ $(echo "${COMMANDS_IN_JOB}" | wc -w) -ne 0 ]; then
  1262. LAST_RUN_JOB=${JOB}
  1263. break
  1264. else
  1265. LAST_RUN_JOB=""
  1266. fi
  1267. done
  1268. fi
  1269. replicate_cleanup() {
  1270. REPLICATE_PID_LOOP=$(printf "${REPLICATE_PROCESSES}" | awk '{print$1}')
  1271. for REPLICATE_PID in ${REPLICATE_PID_LOOP}; do
  1272. USED_CPU=$(ps -a -x -o pid,%cpu,command | grep "${REPLICATE_PID}" | grep -v grep | awk '{sub(/\./,"");print$2}')
  1273. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1274.  
  1275. # OK, so there is at least a process there:
  1276.  
  1277. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1278. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1279. fi
  1280. else
  1281. USED_CPU="00"
  1282. fi
  1283. if [ "${USED_CPU}" = "00" ]; then
  1284. USED_CPU="0"
  1285. else
  1286. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1287. fi
  1288. if [ ${USED_CPU} -ne 0 ]; then
  1289. RUNNING_PROCESSES="${RUNNING_PROCESSES} ${REPLICATE_PID}"
  1290. else
  1291. UNKNOWN_STATUS_PROCESSES="${UNKNOWN_STATUS_PROCESSES} ${REPLICATE_PID}"
  1292. fi
  1293. done
  1294. if [ $(echo "${RUNNING_PROCESSES}" | wc -w) -ne 0 ]; then
  1295. echo "$(date): Started a new process, but previous \"replicate\" processes are still running, aborting." >> $log
  1296. echo "" >> $log
  1297. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1298. else
  1299. if [ $(echo "${UNKNOWN_STATUS_PROCESSES}" | wc -w) -ne 0 ]; then
  1300. for UNKNOWN_STATUS_PROCESS in ${UNKNOWN_STATUS_PROCESSES}; do
  1301. if [ $(ps ax | grep "${UNKNOWN_STATUS_PROCESS}" | grep -v grep | egrep -c ' D.* ') -ne 0 ]; then
  1302. ERROR_MESSAGE="The previous replicate command set has been found with no running processes and seems to be stuck. A process from the last run has been found in \"D\" state which can not be killed, reboot required"
  1303. echo "${ERROR_MESSAGE}" >> ${mail}
  1304. echo "$(ps ax | grep "${UNKNOWN_STATUS_PROCESS}" | grep -v grep | egrep ' D.* ')" >> ${mail}
  1305. mail -s "$subject" $address < ${mail}
  1306. echo "$(date): ${ERROR_MESSAGE}" >> $log
  1307. echo "" >> $log
  1308. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1309. else
  1310. kill ${UNKNOWN_STATUS_PROCESS}
  1311. echo "$(date): No process from previous replicate run found running. Killed idle process \"${UNKNOWN_STATUS_PROCESS}\" as part of the cleanup before trying to start again." >> $log
  1312. echo "" >> $log
  1313. fi
  1314. done
  1315. else
  1316. echo "$(date): No idle or running processes from previous replicate run found running." >> $log
  1317. echo "" >> $log
  1318. fi
  1319. fi
  1320. }
  1321. confirm_activity() {
  1322.  
  1323. while [ "$#" -gt "0" ]; do
  1324. case "$1" in
  1325. "-r")
  1326. MODE="$1"
  1327. ;;
  1328. "-d")
  1329. DESTROYING="$1"
  1330. ;;
  1331. *)
  1332. echo "Unknown argument '$1'" >&2
  1333. ;;
  1334. esac
  1335. shift
  1336. done
  1337.  
  1338. if [ "${MODE}" = "-r" ]; then
  1339. if [ "${DESTROYING}" = "-d" ]; then
  1340. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1341. else
  1342. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1343. fi
  1344. else
  1345. if [ "${DESTROYING}" = "-d" ]; then
  1346. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1347. else
  1348. USED_CPU=$(printf "${REPLICATE_PROCESSES}" | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1349. fi
  1350. fi
  1351. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1352.  
  1353. # OK, so there is at least a process there:
  1354.  
  1355. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1356. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1357. fi
  1358. else
  1359. USED_CPU="00"
  1360. fi
  1361. if [ ${USED_CPU} = "00" ]; then
  1362. USED_CPU="0"
  1363. else
  1364. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1365. fi
  1366. if [ ${USED_CPU} -ne 0 ]; then
  1367.  
  1368. # Found running process:
  1369.  
  1370. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1371. echo "" >> $log
  1372. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1373. else
  1374. if [ "${MODE}" = "-r" ]; then
  1375.  
  1376. # Found 'zfs recv' process but considered idle. Need to check if
  1377. # still active on receiving end.
  1378.  
  1379. SSH="ssh"
  1380. if [ ! -z ${port} ]; then
  1381. SSH="${SSH} -p ${port}"
  1382. else
  1383. SSH="${SSH} -p 22"
  1384. fi
  1385. REPID="$(awk -F':' '{print $6}' /etc/passwd | grep replicator)/.ssh/id_rsa"
  1386. REMOTE_REPLICATE_COMMANDS=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'ssh' | sed -E -e "s/${SSH} ${rh}//" -e 's/.* sudo//' -e 's/^ //' | tr '\n' '|' | sed -E "s/\|$//")
  1387. REMOTE_REPLICATE_PROCESSES=$(${SSH} -i ${REPID} replicator@$rh ps -a -x -o pid,%cpu,command | egrep "zfs send -vRi|zfs recv -vdn|${REMOTE_REPLICATE_COMMANDS}" | egrep -v 'egrep| sudo ')
  1388. if [ "${DESTROYING}" = "-d" ]; then
  1389. USED_CPU=$(printf "${REMOTE_REPLICATE_PROCESSES}" | egrep 'zfs destroy|zfs list' | awk '{sub(/\./,"");print$2}')
  1390. else
  1391. USED_CPU=$(printf "${REMOTE_REPLICATE_PROCESSES}" | grep 'zfs recv' | awk '{sub(/\./,"");print$2}')
  1392. fi
  1393. if [ $(echo "${USED_CPU}" | wc -w) -ne 0 ]; then
  1394.  
  1395. # OK, so there is at least a process there:
  1396.  
  1397. if [ $(printf "${USED_CPU}" | wc -l) -gt 1 ]; then
  1398. USED_CPU=$(printf "${USED_CPU}" | awk '{if($1>a)a=$1};{if(a=="")a="00"};END{print a}')
  1399. fi
  1400. else
  1401. USED_CPU="00"
  1402. fi
  1403. if [ ${USED_CPU} = "00" ]; then
  1404. USED_CPU="0"
  1405. else
  1406. USED_CPU=$(echo "${USED_CPU}" | sed 's/0//g')
  1407. fi
  1408. if [ ${USED_CPU} -ne 0 ]; then
  1409.  
  1410. # Found running process:
  1411.  
  1412. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1413. echo "" >> $log
  1414. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1415. else
  1416.  
  1417. # But for crying out loud, it isn't using any CPU time either?
  1418. # OK, what is left to check then, is the size changing?
  1419.  
  1420. LAST_RUN_FILESYSTEM=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'zfs send' | awk '{print $NF}' | cut -d '@' -f 1)
  1421. TARGET_FILESYSTEM=$(echo ${LAST_RUN_FILESYSTEM} | cut -f 2-512 -d / | sed "s/^/$rp\/$lh\//")
  1422. check_target_size() {
  1423. ${SSH} -i ${REPID} replicator@$rh zfs get -H -p -o value used ${TARGET_FILESYSTEM}
  1424. }
  1425. SIZE_BEFORE=$(check_target_size)
  1426. sleep 120
  1427. SIZE_AFTER=$(check_target_size)
  1428. if [ "${SIZE_BEFORE}" -ne "${SIZE_AFTER}" ]; then
  1429.  
  1430. # Finally, a hit! Then it should at least be doing something.
  1431.  
  1432. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1433. echo "" >> $log
  1434. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1435. else
  1436. if [ "${DESTROYING}" = "-d" ]; then
  1437.  
  1438. # Hmm, so size hasn't changed on the filesystem either, jeez... OK, last check; is the number of snapshots changing?
  1439.  
  1440. check_number_of_snapshots() {
  1441. ${SSH} -i ${REPID} replicator@$rh zfs list -H -t snapshot -o name -r ${TARGET_FILESYSTEM} 2>/dev/null | awk 'END{print NR}'
  1442. }
  1443. SNAPSHOTS_NUMBER_BEFORE=$(check_number_of_snapshots)
  1444. sleep 120
  1445. SNAPSHOTS_NUMBER_AFTER=$(check_number_of_snapshots)
  1446. if [ "${SNAPSHOTS_NUMBER_BEFORE}" -gt "${SNAPSHOTS_NUMBER_AFTER}" ]; then
  1447.  
  1448. # Epic win! So it's probably removing snapshots, good grief what a hassle:)
  1449.  
  1450. echo "$(date): Started a new process, but the previous remote \"replicate\" is still running, aborting." >> $log
  1451. echo "" >> $log
  1452. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1453. else
  1454. replicate_cleanup
  1455. fi
  1456. else
  1457. replicate_cleanup
  1458. fi
  1459. fi
  1460. fi
  1461. else
  1462.  
  1463. # But for crying out loud, it isn't using any CPU time either?
  1464. # OK, what is left to check then, is the size changing?
  1465.  
  1466. LAST_RUN_FILESYSTEM=$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep 'zfs send' | awk '{print $NF}' | cut -d '@' -f 1)
  1467. TARGET_FILESYSTEM=$(echo ${LAST_RUN_FILESYSTEM} | cut -f 2-512 -d / | sed "s/^/$lp\//")
  1468. check_target_size() {
  1469. zfs get -H -p -o value used ${TARGET_FILESYSTEM}
  1470. }
  1471. SIZE_BEFORE=$(check_target_size)
  1472. sleep 120
  1473. SIZE_AFTER=$(check_target_size)
  1474. if [ "${SIZE_BEFORE}" -ne "${SIZE_AFTER}" ]; then
  1475.  
  1476. # Finally, a hit! Then it should at least be doing something.
  1477.  
  1478. echo "$(date): Started a new process, but the previous local \"replicate\" is still running, aborting." >> $log
  1479. echo "" >> $log
  1480. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1481. else
  1482. if [ "${DESTROYING}" = "-d" ]; then
  1483.  
  1484. # Hmm, so size hasn't changed on the filesystem either, jeez... OK, last check; is the number of snapshots changing?
  1485.  
  1486. check_number_of_snapshots() {
  1487. zfs list -H -t snapshot -o name -r ${TARGET_FILESYSTEM} 2>/dev/null | awk 'END{print NR}'
  1488. }
  1489. SNAPSHOTS_NUMBER_BEFORE=$(check_number_of_snapshots)
  1490. sleep 120
  1491. SNAPSHOTS_NUMBER_AFTER=$(check_number_of_snapshots)
  1492. if [ "${SNAPSHOTS_NUMBER_BEFORE}" -gt "${SNAPSHOTS_NUMBER_AFTER}" ]; then
  1493.  
  1494. # Epic win! So it's probably removing snapshots, good grief what a hassle:)
  1495.  
  1496. echo "$(date): Started a new process, but the previous local \"replicate\" is still running, aborting." >> $log
  1497. echo "" >> $log
  1498. rm ${ERR}; rm ${cmd}; rm ${mail}; exit 1
  1499. else
  1500. replicate_cleanup
  1501. fi
  1502. else
  1503. replicate_cleanup
  1504. fi
  1505. fi
  1506. fi
  1507. fi
  1508. }
  1509. confirm_active_recv() {
  1510. if [ ! -z ${rh} ]; then
  1511.  
  1512. # Means it is a remote replication job:
  1513.  
  1514. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'ssh') -ne 0 ]; then
  1515.  
  1516. # Found active 'ssh' transfer:
  1517.  
  1518. if [ $(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | grep -c 'zfs recv') -ne 0 ]; then
  1519.  
  1520. # Found active 'zfs recv':
  1521.  
  1522. confirm_activity -r
  1523. else
  1524.  
  1525. # No active 'zfs recv' found running. Is it destroying?
  1526.  
  1527. if [ $(printf "${REPLICATE_PROCESSES}" | grep 'ssh' | egrep -c 'zfs destroy|zfs list') -ne 0 ]; then
  1528.  
  1529. # It's supposed to be destroying snapshots at the moment. Let's make
  1530. # sure:
  1531.  
  1532. confirm_activity -r -d
  1533. else
  1534. replicate_cleanup
  1535. fi
  1536. fi
  1537. else
  1538.  
  1539. # No active 'ssh' transfer found running when set as remote:
  1540.  
  1541. replicate_cleanup
  1542. fi
  1543. else
  1544.  
  1545. # If not remote, then it is a local replication job:
  1546.  
  1547. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'zfs recv') -ne 0 ]; then
  1548.  
  1549. # Found active 'zfs recv':
  1550.  
  1551. confirm_activity
  1552. else
  1553.  
  1554. # No active 'zfs recv' found running. Is it destroying?
  1555.  
  1556. if [ $(printf "${REPLICATE_PROCESSES}" | egrep -c 'zfs destroy|zfs list') -ne 0 ]; then
  1557.  
  1558. # It's supposed to be destroying snapshots at the moment. Let's make
  1559. # sure:
  1560.  
  1561. confirm_activity -d
  1562. else
  1563. replicate_cleanup
  1564. fi
  1565. fi
  1566. fi
  1567. }
  1568. confirm_replicate_status() {
  1569. if [ $(printf "${REPLICATE_PROCESSES}" | wc -w) -ne 0 ]; then
  1570. if [ $(printf "${REPLICATE_PROCESSES}" | egrep -c 'zfs send|zfs recv|zfs destroy|zfs list') -ne 0 ]; then
  1571. if [ $(printf "${REPLICATE_PROCESSES}" | grep -c 'zfs send') -ne 0 ]; then
  1572.  
  1573. # Found active 'zfs send':
  1574.  
  1575. confirm_active_recv
  1576. else
  1577.  
  1578. # No active 'zfs send' found running:
  1579.  
  1580. confirm_active_recv
  1581. fi
  1582. else
  1583.  
  1584. # No active 'zfs send' or 'zfs recv' found running. If remote, it
  1585. # might still be working on the receiving end.
  1586.  
  1587. if [ "$(printf "${COMMANDS_IN_JOB}" | tr '|' '\n' | grep -c 'ssh')" -ne "0" ]; then
  1588. confirm_activity -r -d
  1589. else
  1590. replicate_cleanup
  1591. fi
  1592. fi
  1593. else
  1594.  
  1595. # No processes left running. Starting new job.
  1596.  
  1597. :
  1598. fi
  1599. }
  1600. if [ $(echo ${LAST_RUN_JOB} | wc -w) -ne 0 ]; then
  1601. if [ $(ps ax | egrep "${LAST_RUN_JOB}" | grep -v egrep -c) -ne 0 ]; then
  1602. REPLICATE_PROCESSES=$(ps -a -x -o pid,%cpu,command | egrep "zfs send -vRi|zfs recv -vdn|${LAST_RUN_JOB}|${COMMANDS_IN_JOB}" | grep -v egrep)
  1603. confirm_replicate_status
  1604. else
  1605.  
  1606. # The last known job to have commands is not running any more:
  1607.  
  1608. replicate_cleanup
  1609. fi
  1610. else
  1611.  
  1612. # No previous job found with valid commands. Retrying with broadened
  1613. # search criteria:
  1614.  
  1615. REPLICATE_PROCESSES=$(ps -a -x -o pid,%cpu,command | egrep 'zfs send|zfs recv|zfs snapshot|zfs rename|zfs destroy|zfs list' | egrep -v 'egrep|zfSnap')
  1616. confirm_replicate_status
  1617. fi
  1618. fi
  1619. chown replicator ${mail}; chmod 600 ${mail}
  1620. chown replicator ${ERR}; chmod 600 ${ERR}
  1621. chown replicator ${cmd}; chmod 700 ${cmd}
  1622. cat > ${cmd} << EOF
  1623. #!/bin/sh
  1624. errorcheck()
  1625. {
  1626. if [ "\$?" -ne "0" ]; then
  1627. tail -2 ${ERR} >> $log
  1628. tail -10 $log > ${mail}
  1629. mail -s "$subject" $address < ${mail}
  1630. cat /dev/null > ${mail}
  1631. exit 1
  1632. fi
  1633. }
  1634.  
  1635. EOF
  1636. }
  1637.  
  1638. autoclean()
  1639. {
  1640. ##
  1641. ## Find and destroy all automatically taken snapshots
  1642. ##
  1643.  
  1644. zfs list -t snapshot -o name | grep auto > ${cmd}
  1645. cat ${cmd}
  1646. sed -i '' 's/^/sudo zfs destroy /' ${cmd}
  1647. }
  1648.  
  1649. allow()
  1650. {
  1651. sudoers="/usr/local/etc/sudoers"
  1652.  
  1653. if [ `grep "ssh" ${cmd} | wc -l` -gt "0" ]
  1654. then
  1655. allows=`grep zfs ${cmd} | cut -f 1 -d ">" | tr '|' '\n' | sed -e 's/^ //' -e 's/ 2$//' | sed -e 's/sudo //' -e 's/^ //' -e 's/^/replicator ALL=(ALL) NOPASSWD: \/sbin\//' | grep -v ssh`
  1656. else
  1657. allows=`grep zfs ${cmd} | cut -f 1 -d ">" | tr '|' '\n' | sed -e 's/^ //' -e 's/ 2$//' | sed -e 's/sudo //' -e 's/^ //' -e 's/^/replicator ALL=(ALL) NOPASSWD: \/sbin\//'`
  1658. fi
  1659. if [ `grep "replicator" $sudoers | wc -l` -gt "0" ]; then
  1660. sed -i '' '/replicator/d' $sudoers
  1661. fi
  1662. echo "$allows" >> $sudoers
  1663. }
  1664.  
  1665. disallow()
  1666. {
  1667. if [ `grep "replicator" $sudoers | wc -l` -gt "0" ]; then
  1668. sed -i '' '/replicator/d' $sudoers
  1669. fi
  1670. }
  1671.  
  1672. ##
  1673. ## Initial preparation
  1674. ##
  1675.  
  1676. case "$1" in
  1677.  
  1678. "")
  1679. ##
  1680. ## If no jobname is specified, it will find all jobs in config directory and
  1681. ## process them one by one
  1682. ##
  1683.  
  1684. for job in $jobs
  1685. do
  1686. . $job
  1687. sanity
  1688. touch $pid
  1689. if [ -z "$rh" ]
  1690. then
  1691. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @local_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1692. then
  1693. l_inc
  1694. else
  1695. l_base
  1696. fi
  1697. else
  1698. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @remote_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1699. then
  1700. r_inc
  1701. if [ "$none" = "yes" ]; then
  1702. usenonecipher
  1703. fi
  1704. else
  1705. r_base
  1706. if [ "$none" = "yes" ]; then
  1707. usenonecipher
  1708. fi
  1709. fi
  1710. fi
  1711. allow
  1712. su replicator -c ${cmd}
  1713. disallow
  1714. rm $pid
  1715. done
  1716. ;;
  1717.  
  1718. -c)
  1719. ##
  1720. ## When cleaning, if no jobname is specified, it will find all jobs in
  1721. ## config directory and process them one by one
  1722. ##
  1723.  
  1724. if [ "$2" = "auto" ]
  1725. then
  1726. else
  1727. echo ""
  1728. echo "Starting off by deleting replication snapshot(s) and cleaning the entire"
  1729. echo "target pool zfs filesystem. This can take a while, depending on how much"
  1730. echo "is stored."
  1731. sleep 10
  1732. fi
  1733.  
  1734. if [ "$2" = "" ]; then
  1735. for job in `find $path -type f ! -iname sample_* ! -iname noauto_* | sort -rd`
  1736. do
  1737. . $job
  1738. sanity
  1739. touch $pid
  1740. if [ -z "$rh" ]
  1741. then
  1742. l_clean
  1743. else
  1744. r_clean
  1745. fi
  1746. allow
  1747. su replicator -c ${cmd}
  1748. disallow
  1749. rm $pid
  1750. done
  1751. exit
  1752. fi
  1753.  
  1754. ##
  1755. ## Also when cleaning, if you type in shorthand jobname, it will only clean
  1756. ## specified job
  1757. ##
  1758.  
  1759. if [ -e ${path}$2 ]; then
  1760. . $path$2
  1761. sanity
  1762. touch $pid
  1763. if [ -z "$rh" ]
  1764. then
  1765. l_clean
  1766. else
  1767. r_clean
  1768. fi
  1769. allow
  1770. su replicator -c ${cmd}
  1771. disallow
  1772. rm $pid
  1773. else
  1774.  
  1775. ##
  1776. ## If you type "auto" when cleaning, it finds and destroys all snapshots with
  1777. ## the word "auto" in them.
  1778. ##
  1779.  
  1780. if [ "$2" = "auto" ]
  1781. then
  1782. sanity
  1783. touch $pid
  1784. autoclean
  1785. while true
  1786. do
  1787. read -r -p 'These snapshot(s) will destroyed. Confirm? "Y|n"' choice
  1788. case "$choice" in
  1789. y|Y) allow
  1790. su replicator -c ${cmd}
  1791. disallow
  1792. rm $pid
  1793. check=`zfs list -t snapshot`
  1794. result="no datasets available"
  1795. if [ "$check" = "$result" ]; then
  1796. echo "Seems clean enough"
  1797. else
  1798. echo "Seems there are snaps left:"
  1799. zfs list -t snapshot
  1800. fi; break ;;
  1801. n|N) echo "Suit yourself..." ; break ;;
  1802. *) echo "Suit yourself..." ; break ;;
  1803. esac
  1804. done
  1805. else
  1806. echo ""
  1807. echo "### Error! ###"
  1808. echo "Jobname \"$2\" does not exist"
  1809. usage
  1810. fi
  1811. fi
  1812. ;;
  1813.  
  1814. -h)
  1815. usage
  1816. ;;
  1817.  
  1818. *)
  1819. ##
  1820. ## If you type in shorthand jobname, it will only process specified job
  1821. ##
  1822.  
  1823. if [ -e ${path}$1 ]
  1824. then
  1825. . $path$1
  1826. sanity
  1827. touch $pid
  1828. if [ -z "$rh" ]
  1829. then
  1830. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @local_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1831. then
  1832. l_inc
  1833. else
  1834. l_base
  1835. fi
  1836. else
  1837. if [ `zfs list -H -r -t snapshot -o name $lfs 2>/dev/null | grep @remote_replicate | wc -l | sed 's/^[ \t]*//'` -gt "0" ]
  1838. then
  1839. r_inc
  1840. if [ "$none" = "yes" ]; then
  1841. usenonecipher
  1842. fi
  1843. else
  1844. r_base
  1845. if [ "$none" = "yes" ]; then
  1846. usenonecipher
  1847. fi
  1848. fi
  1849. fi
  1850. allow
  1851. su replicator -c ${cmd}
  1852. disallow
  1853. rm $pid
  1854. else
  1855. echo ""
  1856. echo "### Error! ###"
  1857. echo "Jobname \"$1\" does not exist"
  1858. usage
  1859. fi
  1860. ;;
  1861.  
  1862. esac
  1863.  
  1864. ##
  1865. ## Final cleanup
  1866. ##
  1867.  
  1868. rm -f /tmp/replicate.*
Advertisement
Add Comment
Please, Sign In to add comment