Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <#
- .SYNOPSIS
- Merges two Netscape bookmark HTML files.
- - Merges same-name folders (case-insensitive)
- - Removes duplicates BY URL or BY NAME+URL
- - Works recursively (inside all folders)
- - No "Root" folder in output
- - 100% Linux/macOS/Windows compatible (PowerShell 7+)
- .PARAMETER DedupBy
- 'Url' → remove if same URL
- 'Both' → remove only if same URL AND same name
- #>
- [CmdletBinding()]
- param(
- [Parameter(Mandatory)] [string] $File1,
- [Parameter(Mandatory)] [string] $File2,
- [Parameter(Mandatory)] [string] $Output,
- [ValidateSet('Url','Both')] [string] $DedupBy = 'Url'
- )
- # ----------------------------------------------------------------------
- # Cross-platform HTML encoding
- # ----------------------------------------------------------------------
- try { $null = [System.Net.WebUtility]::HtmlEncode('') }
- catch { Add-Type -AssemblyName System.Net.Http }
- # ----------------------------------------------------------------------
- # Parse HTML → object tree
- # ----------------------------------------------------------------------
- function ConvertFrom-BookmarkHtml {
- param([string]$Path)
- if (-not (Test-Path $Path)) { Write-Error "Not found: $Path"; return $null }
- $content = Get-Content -Path $Path -Raw -Encoding UTF8
- $content = $content -replace "`r`n?", "`n"
- $stack = [System.Collections.Stack]::new()
- $root = [pscustomobject]@{ Type='Folder'; Name='Root'; Children=@() }
- $stack.Push($root)
- $reFolderOpen = [regex]'<DT><H3[^>]*>(?<name>[^<]+)</H3>'
- $reFolderClose = [regex]'</DL>'
- $reLink = [regex]'<DT><A\s+[^>]*HREF="(?<url>[^"]+)"[^>]*>(?<title>[^<]+)</A>'
- foreach ($line in ($content -split "`n")) {
- $line = $line.Trim()
- if ($line -match $reFolderOpen) {
- $f = [pscustomobject]@{ Type='Folder'; Name=$matches['name'].Trim(); Children=@() }
- $stack.Peek().Children += $f
- $stack.Push($f)
- }
- elseif ($line -match $reFolderClose -and $stack.Count -gt 1) {
- $null = $stack.Pop()
- }
- elseif ($line -match $reLink) {
- $b = [pscustomobject]@{ Type='Bookmark'; Name=$matches['title'].Trim(); Url=$matches['url'].Trim() }
- $stack.Peek().Children += $b
- }
- }
- while ($stack.Count -gt 1) { $null = $stack.Pop() }
- return $root
- }
- # ----------------------------------------------------------------------
- # Serialize tree → HTML (top-level only)
- # ----------------------------------------------------------------------
- function ConvertTo-BookmarkHtml {
- param([pscustomobject[]]$Nodes, [int]$Indent=0)
- $pad = ' ' * ($Indent * 2)
- $sb = [System.Text.StringBuilder]::new()
- foreach ($node in $Nodes) {
- if ($node.Type -eq 'Folder') {
- $enc = [System.Net.WebUtility]::HtmlEncode($node.Name)
- [void]$sb.AppendLine("$pad<DT><H3>$enc</H3>")
- [void]$sb.AppendLine("$pad<DL><p>")
- [void]$sb.Append( (ConvertTo-BookmarkHtml $node.Children ($Indent + 1)) )
- [void]$sb.AppendLine("$pad</DL><p>")
- }
- else {
- $n = [System.Net.WebUtility]::HtmlEncode($node.Name)
- $u = [System.Net.WebUtility]::HtmlEncode($node.Url)
- [void]$sb.AppendLine("$pad<DT><A HREF=`"$u`">$n</A>")
- }
- }
- return $sb.ToString()
- }
- # ----------------------------------------------------------------------
- # RECURSIVE DEDUPLICATION (inside any folder)
- # ----------------------------------------------------------------------
- function Remove-Duplicates {
- param([pscustomobject[]]$Bookmarks, [string]$Mode)
- $seen = @{}
- $unique = foreach ($bm in $Bookmarks) {
- $key = if ($Mode -eq 'Url') { $bm.Url }
- else { "$($bm.Url)|$($bm.Name)" }
- if (-not $seen.ContainsKey($key)) {
- $seen[$key] = $true
- $bm
- }
- }
- return $unique
- }
- # ----------------------------------------------------------------------
- # Merge two trees (with recursive deduplication)
- # ----------------------------------------------------------------------
- function Merge-Trees {
- param(
- [pscustomobject]$Tree1,
- [pscustomobject]$Tree2,
- [string]$DedupBy
- )
- # Clone Tree1
- $result = [pscustomobject]@{
- Type = 'Folder'
- Name = 'Root'
- Children = @() + $Tree1.Children
- }
- # Build folder map
- $folderMap = @{}
- foreach ($f in $result.Children | Where-Object {$_.Type -eq 'Folder'}) {
- $k = $f.Name.ToLowerInvariant()
- if (-not $folderMap.ContainsKey($k)) { $folderMap[$k] = @() }
- $folderMap[$k] += $f
- }
- foreach ($node2 in $Tree2.Children) {
- if ($node2.Type -eq 'Bookmark') {
- # Add to result.Children (will be deduplicated later)
- $result.Children += $node2
- }
- else { # Folder
- $k = $node2.Name.ToLowerInvariant()
- if ($folderMap.ContainsKey($k)) {
- $target = $folderMap[$k][0]
- $merged = Merge-Trees -Tree1 $target -Tree2 $node2 -DedupBy $DedupBy
- $target.Children = $merged.Children
- }
- else {
- $result.Children += $node2
- $folderMap[$k] = @($node2)
- }
- }
- }
- # === RECURSIVE DEDUPLICATION ===
- # Deduplicate bookmarks at this level
- $bookmarks = $result.Children | Where-Object {$_.Type -eq 'Bookmark'}
- $folders = $result.Children | Where-Object {$_.Type -eq 'Folder'}
- $dedupedBookmarks = Remove-Duplicates $bookmarks $DedupBy
- # Recurse into folders
- $finalFolders = foreach ($f in $folders) {
- $f.Children = (Merge-Trees -Tree1 ([pscustomobject]@{Children=$f.Children}) -Tree2 ([pscustomobject]@{Children=@()}) -DedupBy $DedupBy).Children
- $f
- }
- $result.Children = @() + $dedupedBookmarks + $finalFolders
- return $result
- }
- # ----------------------------------------------------------------------
- # Main
- # ----------------------------------------------------------------------
- try {
- $t1 = ConvertFrom-BookmarkHtml $File1
- $t2 = ConvertFrom-BookmarkHtml $File2
- if (-not $t1 -or -not $t2) { exit 1 }
- # Merge + deduplicate recursively
- $merged = Merge-Trees -Tree1 $t1 -Tree2 $t2 -DedupBy $DedupBy
- # Output top-level only
- $header = @"
- <!DOCTYPE NETSCAPE-Bookmark-file-1>
- <!-- Merged by PowerShell -->
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
- <TITLE>Bookmarks</TITLE>
- <H1>Bookmarks</H1>
- <DL><p>
- "@
- $body = ConvertTo-BookmarkHtml $merged.Children
- $footer = "</DL><p>"
- $final = $header + $body + $footer
- $outDir = Split-Path $Output -Parent
- if ($outDir -and -not (Test-Path $outDir)) { New-Item -ItemType Directory -Path $outDir -Force | Out-Null }
- $final | Set-Content -Path $Output -Encoding UTF8
- Write-Host "Merged & deduplicated → $Output" -ForegroundColor Green
- }
- catch {
- Write-Error "Error: $($_.Exception.Message)"
- exit 1
- }
Advertisement
Add Comment
Please, Sign In to add comment