# # cistis.ps1 - Odstranuje casove udaje z Office dokumentu, zachovava autory # param( [Parameter(Mandatory=$true, Position=0)] [string]$InputFile, [Parameter(Mandatory=$false)] [string]$BackupDir = "", [Parameter(Mandatory=$false)] [switch]$Test ) # Konfigurace $CleanExcelRevisions = $true if ($Test) { $VerbosePreference = "Continue" } else { $VerbosePreference = "SilentlyContinue" } $DefaultBackupFolder = "zaloha" $CompressionLevel = [System.IO.Compression.CompressionLevel]::Optimal $XmlEncoding = [System.Text.Encoding]::UTF8 # Regex vzory pro Word dokumenty (vcetne revizi a komentaru) $WordAttrPatterns = @( "\s+w:date\s*=\s*("".*?""|'.*?')", "\s+w14:date\s*=\s*("".*?""|'.*?')", "\s+w15:dateUtc\s*=\s*("".*?""|'.*?')", "\s+mso:date\s*=\s*("".*?""|'.*?')", "\s+w:modified\s*=\s*("".*?""|'.*?')", "\s+w:created\s*=\s*("".*?""|'.*?')", "\s+created\s*=\s*("".*?""|'.*?')", "\s+modified\s*=\s*("".*?""|'.*?')", "\s+lastModifiedTime\s*=\s*("".*?""|'.*?')", 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"', 'w:date\s*=\s*"[^"]*"', 'date\s*=\s*"[^"]*"', '\s+author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"', 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"' ) # Regex vzory pro Excel dokumenty $ExcelAttrPatterns = @( "\s+(\w+:)?(date|dateUtc|created|creationDate|modified|modifiedTime|createdTime|time|dttm|lastModifiedTime)\s*=\s*("".*?""|'.*?')", "\s+dt\s*=\s*("".*?""|'.*?')", "\s+uid\s*=\s*(""[^""]*\d{4}-\d{2}-\d{2}[^""]*""|'[^']*\d{4}-\d{2}-\d{2}[^']*')" ) # Vzory pro metadata dokumentu $MetadataPatterns = @( "]*>.*?", "]*>.*?", ".*?", ".*?", ".*?", ".*?" ) $SupportedExtensions = @(".docx",".docm",".xlsx",".xlsm") if ($Test) { Write-Host "=== DEBUG START ===" -ForegroundColor Magenta Write-Host "Input file: $InputFile" -ForegroundColor Yellow Write-Host "Test mode: $Test" -ForegroundColor Yellow Write-Host "Verbose: $($VerbosePreference)" -ForegroundColor Yellow } # Nacita UTF-8 kodovany textovy soubor function Get-Utf8Text { param([string]$FilePath) return [System.IO.File]::ReadAllText($FilePath, $XmlEncoding) } # Zapise UTF-8 kodovany textovy soubor bez BOM function Set-Utf8Text { param([string]$FilePath, [string]$Content) [System.IO.File]::WriteAllText($FilePath, $Content, (New-Object System.Text.UTF8Encoding($false))) } # Vytvori verzovanou zalohu vstupniho souboru function New-VersionedBackup { param([string]$FullPath, [string]$BackupDirectory = "") $dir = Split-Path $FullPath -Parent $name = Split-Path $FullPath -Leaf $base = [System.IO.Path]::GetFileNameWithoutExtension($name) $ext = [System.IO.Path]::GetExtension($name) if ($BackupDirectory -and (Test-Path $BackupDirectory)) { $bdir = $BackupDirectory } else { $bdir = Join-Path $dir $DefaultBackupFolder } if (-not (Test-Path $bdir)) { New-Item -ItemType Directory -Path $bdir | Out-Null } $stamp = (Get-Date).ToString("yyyyMMdd_HHmmss") $bfile = Join-Path $bdir ("{0}_{1}{2}" -f $base,$stamp,$ext) if (-not $Test) { Copy-Item -LiteralPath $FullPath -Destination $bfile -Force } if (-not $Test) { Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green } else { Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green } return $bfile } # Zpracuje jednotlivy XML soubor a odstrani casove atributy function Process-XmlFile { param( [System.IO.FileInfo]$XmlFile, [string[]]$Patterns, [string]$FilePrefix, [string]$UnzippedPath ) $txt = Get-Utf8Text $XmlFile.FullName $fileRemoved = 0 $originalTxt = $txt foreach ($pat in $Patterns) { $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor [System.Text.RegularExpressions.RegexOptions]::Singleline) if ($matches.Count -gt 0) { Write-Verbose (" Nalezeno {0} vyskytov vzorem: {1}" -f $matches.Count, $pat) foreach ($match in $matches) { Write-Verbose (" Odstranuji: {0}" -f $match.Value.Trim()) } $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "", [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor [System.Text.RegularExpressions.RegexOptions]::Singleline) $fileRemoved += $matches.Count } } if ($fileRemoved -gt 0) { $relativePath = $XmlFile.FullName.Replace($UnzippedPath, '') if (-not $Test) { Set-Utf8Text $XmlFile.FullName $txt } Write-Verbose ("{0} {1} removed={2}" -f $FilePrefix, $relativePath, $fileRemoved) } return @{ RemovedCount = $fileRemoved } } # Zpracuje soubory komentaru specificke pro odstraneni casovych udaju function Process-CommentFiles { param([string]$UnzippedPath, [string]$FileType) if ($Test) { Write-Host "DEBUG: Zpracovavam komentare pro $FileType" -ForegroundColor Cyan } $changed = 0 $totalRemoved = 0 if ($FileType -eq "Word") { $wordFolder = Join-Path $UnzippedPath "word" $commentFiles = @() if ($Test) { Write-Host "DEBUG: Hledam komentare ve slozce: $wordFolder" -ForegroundColor Cyan } # Hlavni soubor komentaru $mainComments = Join-Path $wordFolder "comments.xml" if (Test-Path $mainComments) { $commentFiles += Get-Item $mainComments if ($Test) { Write-Host "DEBUG: Nalezen hlavni soubor komentaru: comments.xml" -ForegroundColor Green } } # Komentare v dalsich souborech $additionalComments = Get-ChildItem -Path $wordFolder -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue if ($additionalComments) { $commentFiles += $additionalComments if ($Test) { Write-Host "DEBUG: Nalezeno $($additionalComments.Count) dalsich souboru s komentari" -ForegroundColor Green } } # Zkontroluj take document.xml kde mohou byt komentare $docXml = Join-Path $wordFolder "document.xml" if (Test-Path $docXml) { $commentFiles += Get-Item $docXml if ($Test) { Write-Host "DEBUG: Pridavam document.xml pro kontrolu komentaru" -ForegroundColor Green } } } else { # Excel komentare $xl = Join-Path $UnzippedPath "xl" $commentFiles = Get-ChildItem -Path $xl -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue } if ($Test) { Write-Host "DEBUG: Celkem souboru ke kontrole: $($commentFiles.Count)" -ForegroundColor Cyan } foreach ($commentFile in $commentFiles) { if ($Test) { Write-Host "DEBUG: Zpracovavam komentare v: $($commentFile.Name)" -ForegroundColor Yellow } $txt = Get-Utf8Text $commentFile.FullName $originalTxt = $txt $fileRemoved = 0 # Ukaz cast obsahu pro debug (pouze v testu) if ($Test) { Write-Host "DEBUG: Prvnich 200 znaku souboru:" -ForegroundColor Gray Write-Host ($txt.Substring(0, [Math]::Min(200, $txt.Length))) -ForegroundColor Gray } # Specialni vzory pro komentare $commentPatterns = @( 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"', # date="2025-08-25T11:39:00Z" 'w:date\s*=\s*"[^"]*"', # w:date="..." 'date\s*=\s*"[^"]*"', # obecny date="..." 'author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"', # author="..." date="..." 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"' # jakykoli date s datem ) foreach ($pattern in $commentPatterns) { $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pattern, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) if ($matches.Count -gt 0) { if ($Test) { Write-Host " DEBUG: Nalezeno $($matches.Count) casovych udaju vzorem: $pattern" -ForegroundColor Green foreach ($match in $matches) { Write-Host " DEBUG: Odstranuji: $($match.Value)" -ForegroundColor Red } } # Uplne odstran casove atributy $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pattern, "", [System.Text.RegularExpressions.RegexOptions]::IgnoreCase) $fileRemoved += $matches.Count } } if ($fileRemoved -gt 0) { if (-not $Test) { Set-Utf8Text $commentFile.FullName $txt } $changed++ $totalRemoved += $fileRemoved if ($Test) { Write-Host "[C] $($commentFile.Name) removed=$fileRemoved" -ForegroundColor Green } } else { if ($Test) { Write-Host "DEBUG: V souboru $($commentFile.Name) nebyly nalezeny casove udaje" -ForegroundColor Gray } } } return @{ Changed = $changed; Removed = $totalRemoved } } # Zpracuje metadata dokumentu a odstrani casove udaje function Process-DocumentMetadata { param([string]$UnzippedPath) $changed = 0 $totalRemoved = 0 # Zpracuj core.xml (zakladni metadata) $coreXml = Join-Path $UnzippedPath "docProps\core.xml" if (Test-Path $coreXml) { $txt = Get-Utf8Text $coreXml $originalTxt = $txt foreach ($pat in $MetadataPatterns) { $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat, [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor [System.Text.RegularExpressions.RegexOptions]::Singleline) if ($matches.Count -gt 0) { $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "", [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor [System.Text.RegularExpressions.RegexOptions]::Singleline) $totalRemoved += $matches.Count } } if ($txt -ne $originalTxt) { if (-not $Test) { Set-Utf8Text $coreXml $txt } $changed++ Write-Verbose "[M] core.xml removed=$totalRemoved" } } return @{ Changed = $changed; Removed = $totalRemoved } } # Zpracuje Word dokument - najde a zpracuje vsechny XML soubory function Process-WordDocument { param([string]$UnzippedPath) $wordFolder = Join-Path $UnzippedPath "word" if (-not (Test-Path $wordFolder)) { throw "Neplatna DOCX struktura (chybi /word slozka)" } $targets = Get-ChildItem -Path $wordFolder -Filter *.xml -Recurse -File -ErrorAction SilentlyContinue $changed = 0 $totalRemoved = 0 if ($Test) { Write-Host "DEBUG: Nalezeno $($targets.Count) XML souboru ve Word slozce" -ForegroundColor Cyan } foreach ($xmlFile in $targets) { if ($Test) { Write-Host "DEBUG: Zpracovavam: $($xmlFile.Name)" -ForegroundColor Gray } $result = Process-XmlFile -XmlFile $xmlFile -Patterns $WordAttrPatterns -FilePrefix "[W]" -UnzippedPath $UnzippedPath if ($result.RemovedCount -gt 0) { $changed++ $totalRemoved += $result.RemovedCount } } return @{ Changed = $changed; Removed = $totalRemoved } } # Zpracuje Excel sešit - najde a zpracuje komentare a revize function Process-ExcelWorkbook { param([string]$UnzippedPath) $xl = Join-Path $UnzippedPath "xl" if (-not (Test-Path $xl)) { throw "Neplatna XLSX struktura (chybi /xl slozka)" } $changed = 0 $totalRemoved = 0 # Komentare (legacy) a threaded komentare (moderni) $commentFiles = Get-ChildItem -Path $xl -Filter "comments*.xml" -Recurse -File -ErrorAction SilentlyContinue $threadedFiles = Get-ChildItem -Path $xl -Filter "threadedComments*.xml" -Recurse -File -ErrorAction SilentlyContinue $targets = @() if ($commentFiles) { $targets += $commentFiles } if ($threadedFiles) { $targets += $threadedFiles } foreach ($xmlFile in $targets) { $result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[X]" -UnzippedPath $UnzippedPath if ($result.RemovedCount -gt 0) { $changed++ $totalRemoved += $result.RemovedCount } } # Volitelne: legacy revize if ($CleanExcelRevisions) { $revDir = Join-Path $xl "revisions" if (Test-Path $revDir) { $revFiles = Get-ChildItem -Path $revDir -Filter "*.xml" -File -ErrorAction SilentlyContinue foreach ($xmlFile in $revFiles) { $result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[XR]" -UnzippedPath $UnzippedPath if ($result.RemovedCount -gt 0) { $changed++ $totalRemoved += $result.RemovedCount } } } } return @{ Changed = $changed; Removed = $totalRemoved } } $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path $tryPath = Join-Path $scriptDir $InputFile if ($Test) { Write-Host "DEBUG: Script dir: $scriptDir" -ForegroundColor Yellow Write-Host "DEBUG: Try path: $tryPath" -ForegroundColor Yellow } if (Test-Path -LiteralPath $tryPath -PathType Leaf) { $FullPath = [System.IO.Path]::GetFullPath($tryPath) if ($Test) { Write-Host "DEBUG: Soubor nalezen v try path: $FullPath" -ForegroundColor Green } } elseif (Test-Path -LiteralPath $InputFile -PathType Leaf) { $FullPath = [System.IO.Path]::GetFullPath($InputFile) if ($Test) { Write-Host "DEBUG: Soubor nalezen v input path: $FullPath" -ForegroundColor Green } } else { Write-Host "CHYBA: Soubor nebyl nalezen: $InputFile" -ForegroundColor Red if ($Test) { Write-Host "DEBUG: Zkusil jsem: $tryPath" -ForegroundColor Red Write-Host "DEBUG: A take: $InputFile" -ForegroundColor Red } exit 1 } $Extension = [System.IO.Path]::GetExtension($FullPath).ToLowerInvariant() if ($Test) { Write-Host "DEBUG: Extension: $Extension" -ForegroundColor Yellow } if ($Extension -notin $SupportedExtensions) { Write-Host "CHYBA: Podporovane jsou pouze: $($SupportedExtensions -join ', ')" -ForegroundColor Red exit 1 } if ($Test -or $VerbosePreference -eq "Continue") { Write-Host "POZOR: Modifikace znevazni digitalni podpisy (pokud existuji)" -ForegroundColor Yellow } if ($Test) { Write-Host "TEST REZIM: Zadne zmeny nebudou provedeny" -ForegroundColor Cyan } $backupFile = New-VersionedBackup -FullPath $FullPath -BackupDirectory $BackupDir Add-Type -AssemblyName System.IO.Compression.FileSystem -ErrorAction SilentlyContinue | Out-Null Add-Type -AssemblyName System.IO.Compression -ErrorAction SilentlyContinue | Out-Null $guid = [Guid]::NewGuid().ToString("N") $workRoot = Join-Path $env:TEMP ("CleanTimestamps_" + $guid) $unzipped = Join-Path $workRoot "unzipped" if ($Test) { Write-Host "DEBUG: Work root: $workRoot" -ForegroundColor Yellow Write-Host "DEBUG: Unzipped path: $unzipped" -ForegroundColor Yellow } # Rozbal soubor i pro test (potrebujeme videt obsah) New-Item -ItemType Directory -Path $workRoot,$unzipped -Force | Out-Null if ($Test) { Write-Host "DEBUG: Rozbaluji soubor..." -ForegroundColor Yellow } [System.IO.Compression.ZipFile]::ExtractToDirectory($FullPath, $unzipped) if ($Test) { Write-Host "DEBUG: Soubor rozbalen" -ForegroundColor Green } if (-not $Test) { $fileName = Split-Path $FullPath -Leaf if ($Extension -in @(".docx",".docm")) { Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow } else { Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow if ($CleanExcelRevisions) { Write-Host "Vcetne revizi (historie bude ztracena)" -ForegroundColor Magenta } } } # Zpracuj podle typu souboru $results = @{ Changed = 0; Removed = 0 } $metadataResults = @{ Changed = 0; Removed = 0 } $commentResults = @{ Changed = 0; Removed = 0 } if ($Extension -in @(".docx",".docm")) { # Standardni zpracovani XML souboru if ($Test) { Write-Host "DEBUG: Spoustim standardni zpracovani Word dokumentu" -ForegroundColor Cyan } $results = Process-WordDocument -UnzippedPath $unzipped # Specificke zpracovani komentaru if ($Test) { Write-Host "DEBUG: Spoustim specificke zpracovani komentaru" -ForegroundColor Cyan } $commentResults = Process-CommentFiles -UnzippedPath $unzipped -FileType "Word" # Metadata dokumentu if ($Test) { Write-Host "DEBUG: Spoustim zpracovani metadat" -ForegroundColor Cyan } $metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped } elseif ($Extension -in @(".xlsx",".xlsm")) { $results = Process-ExcelWorkbook -UnzippedPath $unzipped $metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped } # Zabal zpatky (pouze pokud to neni test) if (-not $Test) { $tmpZip = Join-Path $workRoot "packed.zip" if (Test-Path $FullPath) { Remove-Item $FullPath -Force } [System.IO.Compression.ZipFile]::CreateFromDirectory($unzipped, $tmpZip, $CompressionLevel, $false) Copy-Item $tmpZip $FullPath -Force Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue } else { Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue } # Finalni vypis Write-Host "" if (-not $Test) { Write-Host "=== VYSLEDKY ===" -ForegroundColor Green } else { Write-Host "=== VYSLEDKY ===" -ForegroundColor Magenta } if ($Test) { Write-Host "" Write-Host "TEST DOKONCEN: $FullPath" -ForegroundColor Cyan Write-Host "Pro skutecne provedeni spustte bez parametru -Test" -ForegroundColor Yellow # Ukaz co by se stalo $totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed $totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed if ($totalRemoved -gt 0) { Write-Host "" Write-Host "TEST - Co by bylo odstraneno:" -ForegroundColor Yellow Write-Host " Zmenene XML soubory: $totalChanged" -ForegroundColor White Write-Host " Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray } else { Write-Host "" Write-Host "TEST - Zadne casove atributy nebyly nalezeny" -ForegroundColor Gray } } else { try { $totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed $totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed Write-Host "Zmenene XML soubory: $totalChanged" -ForegroundColor White Write-Host "Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray if ($totalRemoved -eq 0) { Write-Host "Vysledek: OK (zadne casove atributy nenalezeny)" -ForegroundColor Gray } else { Write-Host "Vysledek: OK" -ForegroundColor Green } } catch { Write-Host "Vysledek: CHYBA - $($_.Exception.Message)" -ForegroundColor Red exit 1 } } if ($Test) { Write-Host "=== DEBUG END ===" -ForegroundColor Magenta }