Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env bash
- set -euo pipefail
- INTERVAL_SEC=2
- SNAPSHOT_COOLDOWN_SEC=30 # minimum seconds between snapshots
- PER_TAG_COOLDOWN_SEC=60 # minimum seconds between logs for same tag
- ETH_IF="${ETH_IF:-eth0}"
- LOGDIR="${LOGDIR:-/home/pi/e}"
- LOGFILE="${LOGFILE:-$LOGDIR/brownout.log}"
- STATEFILE="${STATEFILE:-$LOGDIR/brownout.state}"
- LOCKFILE="${LOCKFILE:-/run/fr201-brownout-monitor.lock}"
- # Broad kernel patterns that may indicate (or result from) brownouts
- # (not limited to USB/Ethernet)
- KERNEL_PAT='under[- ]voltage|over[- ]current|brownout|throttl|watchdog|hung task|soft lockup|hard lockup|kernel panic|oops|BUG:|segfault|rcu_sched|mmc|sdhci|mmcblk|I/O error|Buffer I/O error|blk_update_request|EXT4-fs error|XFS.*error|FAT-fs|exFAT|BTRFS.*error|nvme.*reset|pcie.*AER|usb .*disconnect|usb .*reset|xhci|dwc2|uas|link down|link up|NETDEV WATCHDOG'
- ts() { date --iso-8601=seconds; }
- epoch() { date +%s; }
- have() { command -v "$1" >/dev/null 2>&1; }
- log() { printf '%s %s\n' "$(ts)" "$*" >>"$LOGFILE"; }
- get_throttled_raw() {
- if have vcgencmd; then
- vcgencmd get_throttled 2>/dev/null | awk -F= '{print $2}' | tr -d '\r\n'
- return 0
- fi
- if [[ -r /sys/devices/platform/soc/soc:firmware/get_throttled ]]; then
- cat /sys/devices/platform/soc/soc:firmware/get_throttled 2>/dev/null | tr -d '\r\n'
- return 0
- fi
- echo ""
- }
- hex_to_int() {
- local h="${1#0x}"
- [[ -z "$h" ]] && { echo 0; return 0; }
- echo $((16#$h))
- }
- describe_mask() {
- local m="$1"
- local out=()
- (( m & 0x1 )) && out+=("NOW:undervoltage")
- (( m & 0x2 )) && out+=("NOW:freq_capped")
- (( m & 0x4 )) && out+=("NOW:throttling")
- (( m & 0x8 )) && out+=("NOW:temp_limit")
- (( m & 0x10000 )) && out+=("SINCE:undervoltage")
- (( m & 0x20000 )) && out+=("SINCE:freq_capped")
- (( m & 0x40000 )) && out+=("SINCE:throttling")
- (( m & 0x80000 )) && out+=("SINCE:temp_limit")
- ((${#out[@]}==0)) && out=("OK")
- printf '%s' "${out[*]}"
- }
- snapshot_devices() {
- local reason="$1"
- local now_e
- now_e="$(epoch)"
- if (( now_e - LAST_SNAPSHOT_EPOCH < SNAPSHOT_COOLDOWN_SEC )); then
- log "SNAPSHOT skipped (cooldown) reason=$reason"
- return 0
- fi
- LAST_SNAPSHOT_EPOCH="$now_e"
- log "SNAPSHOT begin reason=$reason"
- if have vcgencmd; then
- {
- vcgencmd get_throttled || true
- vcgencmd measure_volts core || true
- vcgencmd measure_clock arm || true
- vcgencmd measure_temp || true
- } | sed 's/^/ /' >>"$LOGFILE"
- fi
- {
- echo " [net]"
- ip -br link 2>/dev/null || true
- if have ethtool && ip link show "$ETH_IF" >/dev/null 2>&1; then
- ethtool "$ETH_IF" 2>/dev/null | egrep -i 'Speed:|Duplex:|Auto-negotiation:|Link detected:' || true
- fi
- } >>"$LOGFILE"
- {
- echo " [usb]"
- if have lsusb; then
- lsusb -t 2>/dev/null || true
- fi
- } >>"$LOGFILE"
- {
- echo " [block]"
- lsblk -o NAME,MODEL,SIZE,FSTYPE,TYPE,TRAN,MOUNTPOINTS 2>/dev/null || true
- } >>"$LOGFILE"
- # Small kernel window for context, *only* around the event time
- if have journalctl; then
- local since until
- since="$((now_e - 15))"
- until="$((now_e + 2))"
- {
- echo " [kernel window]"
- journalctl -k --since "@$since" --until "@$until" -o short-iso 2>/dev/null \
- | egrep -i "$KERNEL_PAT" || true
- } >>"$LOGFILE"
- fi
- log "SNAPSHOT end"
- }
- tag_line() {
- local l="$1"
- shopt -s nocasematch
- if [[ "$l" =~ under[-\ ]voltage|over[-\ ]current|brownout|throttl ]]; then
- echo "POWER"
- elif [[ "$l" =~ mmc|sdhci|mmcblk ]]; then
- echo "MMC"
- elif [[ "$l" =~ Buffer\ I/O\ error|blk_update_request|I/O\ error ]]; then
- echo "BLOCK_IO"
- elif [[ "$l" =~ EXT4-fs\ error|XFS.*error|BTRFS.*error|FAT-fs|exFAT ]]; then
- echo "FS"
- elif [[ "$l" =~ usb\ .*disconnect|usb\ .*reset|xhci|dwc2|uas ]]; then
- echo "USB"
- elif [[ "$l" =~ link\ down|link\ up|NETDEV\ WATCHDOG ]]; then
- echo "NET"
- elif [[ "$l" =~ watchdog|soft\ lockup|hard\ lockup|hung\ task ]]; then
- echo "SCHED"
- elif [[ "$l" =~ kernel\ panic|oops|BUG: ]]; then
- echo "KERNEL"
- else
- echo "OTHER"
- fi
- shopt -u nocasematch
- }
- should_log_tag() {
- local tag="$1"
- local now_e
- now_e="$(epoch)"
- local last="${LAST_TAG_EPOCH[$tag]:-0}"
- if (( now_e - last >= PER_TAG_COOLDOWN_SEC )); then
- LAST_TAG_EPOCH[$tag]="$now_e"
- return 0
- fi
- return 1
- }
- follow_kernel() {
- have journalctl || { log "WARN: journalctl missing; kernel-follow disabled"; return 0; }
- journalctl -k -f -o short-iso 2>/dev/null | while IFS= read -r line; do
- if echo "$line" | egrep -qi "$KERNEL_PAT"; then
- local tag
- tag="$(tag_line "$line")"
- if should_log_tag "$tag"; then
- log "KERNEL[$tag] $line"
- snapshot_devices "kernel:$tag"
- fi
- fi
- done
- }
- poll_throttled() {
- local last_mask="-1"
- if [[ -r "$STATEFILE" ]]; then
- last_mask="$(cat "$STATEFILE" 2>/dev/null || echo -1)"
- fi
- while true; do
- local raw mask
- raw="$(get_throttled_raw || true)"
- if [[ -n "$raw" ]]; then
- mask="$(hex_to_int "$raw")"
- if [[ "$mask" != "$last_mask" ]]; then
- log "THROTTLED raw=0x${raw#0x} $(describe_mask "$mask")"
- printf '%s\n' "$mask" > "${STATEFILE}.tmp"
- mv -f "${STATEFILE}.tmp" "$STATEFILE"
- # Snapshot on any change (rare), not every poll
- snapshot_devices "throttled_change"
- last_mask="$mask"
- fi
- fi
- sleep "$INTERVAL_SEC"
- done
- }
- # ---- main ----
- mkdir -p "$LOGDIR"
- touch "$LOGFILE"
- exec 9>"$LOCKFILE"
- if ! flock -n 9; then
- exit 0
- fi
- declare -A LAST_TAG_EPOCH
- LAST_SNAPSHOT_EPOCH=0
- log "monitor start (interval=${INTERVAL_SEC}s, eth_if=${ETH_IF})"
- follow_kernel &
- poll_throttled
Add Comment
Please, Sign In to add comment