#
# cistis.ps1 - Odstranuje casove udaje z Office dokumentu, zachovava autory
#
param(
[Parameter(Mandatory=$true, Position=0)]
[string]$InputFile,
[Parameter(Mandatory=$false)]
[string]$BackupDir = "",
[Parameter(Mandatory=$false)]
[switch]$Test
)
# Konfigurace
$CleanExcelRevisions = $true
if ($Test) {
$VerbosePreference = "Continue"
} else {
$VerbosePreference = "SilentlyContinue"
}
$DefaultBackupFolder = "zaloha"
$CompressionLevel = [System.IO.Compression.CompressionLevel]::Optimal
$XmlEncoding = [System.Text.Encoding]::UTF8
# Regex vzory pro Word dokumenty (vcetne revizi a komentaru)
$WordAttrPatterns = @(
"\s+w:date\s*=\s*("".*?""|'.*?')",
"\s+w14:date\s*=\s*("".*?""|'.*?')",
"\s+w15:dateUtc\s*=\s*("".*?""|'.*?')",
"\s+mso:date\s*=\s*("".*?""|'.*?')",
"\s+w:modified\s*=\s*("".*?""|'.*?')",
"\s+w:created\s*=\s*("".*?""|'.*?')",
"\s+created\s*=\s*("".*?""|'.*?')",
"\s+modified\s*=\s*("".*?""|'.*?')",
"\s+lastModifiedTime\s*=\s*("".*?""|'.*?')",
'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"',
'w:date\s*=\s*"[^"]*"',
'date\s*=\s*"[^"]*"',
'\s+author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"',
'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"'
)
# Regex vzory pro Excel dokumenty
$ExcelAttrPatterns = @(
"\s+(\w+:)?(date|dateUtc|created|creationDate|modified|modifiedTime|createdTime|time|dttm|lastModifiedTime)\s*=\s*("".*?""|'.*?')",
"\s+dt\s*=\s*("".*?""|'.*?')",
"\s+uid\s*=\s*(""[^""]*\d{4}-\d{2}-\d{2}[^""]*""|'[^']*\d{4}-\d{2}-\d{2}[^']*')"
)
# Vzory pro metadata dokumentu
$MetadataPatterns = @(
"]*>.*?",
"]*>.*?",
".*?",
".*?",
".*?",
".*?"
)
$SupportedExtensions = @(".docx",".docm",".xlsx",".xlsm")
if ($Test) {
Write-Host "=== DEBUG START ===" -ForegroundColor Magenta
Write-Host "Input file: $InputFile" -ForegroundColor Yellow
Write-Host "Test mode: $Test" -ForegroundColor Yellow
Write-Host "Verbose: $($VerbosePreference)" -ForegroundColor Yellow
}
# Nacita UTF-8 kodovany textovy soubor
function Get-Utf8Text {
param([string]$FilePath)
return [System.IO.File]::ReadAllText($FilePath, $XmlEncoding)
}
# Zapise UTF-8 kodovany textovy soubor bez BOM
function Set-Utf8Text {
param([string]$FilePath, [string]$Content)
[System.IO.File]::WriteAllText($FilePath, $Content, (New-Object System.Text.UTF8Encoding($false)))
}
# Vytvori verzovanou zalohu vstupniho souboru
function New-VersionedBackup {
param([string]$FullPath, [string]$BackupDirectory = "")
$dir = Split-Path $FullPath -Parent
$name = Split-Path $FullPath -Leaf
$base = [System.IO.Path]::GetFileNameWithoutExtension($name)
$ext = [System.IO.Path]::GetExtension($name)
if ($BackupDirectory -and (Test-Path $BackupDirectory)) {
$bdir = $BackupDirectory
} else {
$bdir = Join-Path $dir $DefaultBackupFolder
}
if (-not (Test-Path $bdir)) {
New-Item -ItemType Directory -Path $bdir | Out-Null
}
$stamp = (Get-Date).ToString("yyyyMMdd_HHmmss")
$bfile = Join-Path $bdir ("{0}_{1}{2}" -f $base,$stamp,$ext)
if (-not $Test) {
Copy-Item -LiteralPath $FullPath -Destination $bfile -Force
}
if (-not $Test) {
Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green
} else {
Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green
}
return $bfile
}
# Zpracuje jednotlivy XML soubor a odstrani casove atributy
function Process-XmlFile {
param(
[System.IO.FileInfo]$XmlFile,
[string[]]$Patterns,
[string]$FilePrefix,
[string]$UnzippedPath
)
$txt = Get-Utf8Text $XmlFile.FullName
$fileRemoved = 0
$originalTxt = $txt
foreach ($pat in $Patterns) {
$matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat,
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
[System.Text.RegularExpressions.RegexOptions]::Singleline)
if ($matches.Count -gt 0) {
Write-Verbose (" Nalezeno {0} vyskytov vzorem: {1}" -f $matches.Count, $pat)
foreach ($match in $matches) {
Write-Verbose (" Odstranuji: {0}" -f $match.Value.Trim())
}
$txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "",
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
[System.Text.RegularExpressions.RegexOptions]::Singleline)
$fileRemoved += $matches.Count
}
}
if ($fileRemoved -gt 0) {
$relativePath = $XmlFile.FullName.Replace($UnzippedPath, '')
if (-not $Test) {
Set-Utf8Text $XmlFile.FullName $txt
}
Write-Verbose ("{0} {1} removed={2}" -f $FilePrefix, $relativePath, $fileRemoved)
}
return @{ RemovedCount = $fileRemoved }
}
# Zpracuje soubory komentaru specificke pro odstraneni casovych udaju
function Process-CommentFiles {
param([string]$UnzippedPath, [string]$FileType)
if ($Test) {
Write-Host "DEBUG: Zpracovavam komentare pro $FileType" -ForegroundColor Cyan
}
$changed = 0
$totalRemoved = 0
if ($FileType -eq "Word") {
$wordFolder = Join-Path $UnzippedPath "word"
$commentFiles = @()
if ($Test) {
Write-Host "DEBUG: Hledam komentare ve slozce: $wordFolder" -ForegroundColor Cyan
}
# Hlavni soubor komentaru
$mainComments = Join-Path $wordFolder "comments.xml"
if (Test-Path $mainComments) {
$commentFiles += Get-Item $mainComments
if ($Test) {
Write-Host "DEBUG: Nalezen hlavni soubor komentaru: comments.xml" -ForegroundColor Green
}
}
# Komentare v dalsich souborech
$additionalComments = Get-ChildItem -Path $wordFolder -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue
if ($additionalComments) {
$commentFiles += $additionalComments
if ($Test) {
Write-Host "DEBUG: Nalezeno $($additionalComments.Count) dalsich souboru s komentari" -ForegroundColor Green
}
}
# Zkontroluj take document.xml kde mohou byt komentare
$docXml = Join-Path $wordFolder "document.xml"
if (Test-Path $docXml) {
$commentFiles += Get-Item $docXml
if ($Test) {
Write-Host "DEBUG: Pridavam document.xml pro kontrolu komentaru" -ForegroundColor Green
}
}
} else {
# Excel komentare
$xl = Join-Path $UnzippedPath "xl"
$commentFiles = Get-ChildItem -Path $xl -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue
}
if ($Test) {
Write-Host "DEBUG: Celkem souboru ke kontrole: $($commentFiles.Count)" -ForegroundColor Cyan
}
foreach ($commentFile in $commentFiles) {
if ($Test) {
Write-Host "DEBUG: Zpracovavam komentare v: $($commentFile.Name)" -ForegroundColor Yellow
}
$txt = Get-Utf8Text $commentFile.FullName
$originalTxt = $txt
$fileRemoved = 0
# Ukaz cast obsahu pro debug (pouze v testu)
if ($Test) {
Write-Host "DEBUG: Prvnich 200 znaku souboru:" -ForegroundColor Gray
Write-Host ($txt.Substring(0, [Math]::Min(200, $txt.Length))) -ForegroundColor Gray
}
# Specialni vzory pro komentare
$commentPatterns = @(
'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"', # date="2025-08-25T11:39:00Z"
'w:date\s*=\s*"[^"]*"', # w:date="..."
'date\s*=\s*"[^"]*"', # obecny date="..."
'author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"', # author="..." date="..."
'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"' # jakykoli date s datem
)
foreach ($pattern in $commentPatterns) {
$matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pattern,
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
if ($matches.Count -gt 0) {
if ($Test) {
Write-Host " DEBUG: Nalezeno $($matches.Count) casovych udaju vzorem: $pattern" -ForegroundColor Green
foreach ($match in $matches) {
Write-Host " DEBUG: Odstranuji: $($match.Value)" -ForegroundColor Red
}
}
# Uplne odstran casove atributy
$txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pattern, "",
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
$fileRemoved += $matches.Count
}
}
if ($fileRemoved -gt 0) {
if (-not $Test) {
Set-Utf8Text $commentFile.FullName $txt
}
$changed++
$totalRemoved += $fileRemoved
if ($Test) {
Write-Host "[C] $($commentFile.Name) removed=$fileRemoved" -ForegroundColor Green
}
} else {
if ($Test) {
Write-Host "DEBUG: V souboru $($commentFile.Name) nebyly nalezeny casove udaje" -ForegroundColor Gray
}
}
}
return @{ Changed = $changed; Removed = $totalRemoved }
}
# Zpracuje metadata dokumentu a odstrani casove udaje
function Process-DocumentMetadata {
param([string]$UnzippedPath)
$changed = 0
$totalRemoved = 0
# Zpracuj core.xml (zakladni metadata)
$coreXml = Join-Path $UnzippedPath "docProps\core.xml"
if (Test-Path $coreXml) {
$txt = Get-Utf8Text $coreXml
$originalTxt = $txt
foreach ($pat in $MetadataPatterns) {
$matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat,
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
[System.Text.RegularExpressions.RegexOptions]::Singleline)
if ($matches.Count -gt 0) {
$txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "",
[System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
[System.Text.RegularExpressions.RegexOptions]::Singleline)
$totalRemoved += $matches.Count
}
}
if ($txt -ne $originalTxt) {
if (-not $Test) {
Set-Utf8Text $coreXml $txt
}
$changed++
Write-Verbose "[M] core.xml removed=$totalRemoved"
}
}
return @{ Changed = $changed; Removed = $totalRemoved }
}
# Zpracuje Word dokument - najde a zpracuje vsechny XML soubory
function Process-WordDocument {
param([string]$UnzippedPath)
$wordFolder = Join-Path $UnzippedPath "word"
if (-not (Test-Path $wordFolder)) {
throw "Neplatna DOCX struktura (chybi /word slozka)"
}
$targets = Get-ChildItem -Path $wordFolder -Filter *.xml -Recurse -File -ErrorAction SilentlyContinue
$changed = 0
$totalRemoved = 0
if ($Test) {
Write-Host "DEBUG: Nalezeno $($targets.Count) XML souboru ve Word slozce" -ForegroundColor Cyan
}
foreach ($xmlFile in $targets) {
if ($Test) {
Write-Host "DEBUG: Zpracovavam: $($xmlFile.Name)" -ForegroundColor Gray
}
$result = Process-XmlFile -XmlFile $xmlFile -Patterns $WordAttrPatterns -FilePrefix "[W]" -UnzippedPath $UnzippedPath
if ($result.RemovedCount -gt 0) {
$changed++
$totalRemoved += $result.RemovedCount
}
}
return @{ Changed = $changed; Removed = $totalRemoved }
}
# Zpracuje Excel sešit - najde a zpracuje komentare a revize
function Process-ExcelWorkbook {
param([string]$UnzippedPath)
$xl = Join-Path $UnzippedPath "xl"
if (-not (Test-Path $xl)) {
throw "Neplatna XLSX struktura (chybi /xl slozka)"
}
$changed = 0
$totalRemoved = 0
# Komentare (legacy) a threaded komentare (moderni)
$commentFiles = Get-ChildItem -Path $xl -Filter "comments*.xml" -Recurse -File -ErrorAction SilentlyContinue
$threadedFiles = Get-ChildItem -Path $xl -Filter "threadedComments*.xml" -Recurse -File -ErrorAction SilentlyContinue
$targets = @()
if ($commentFiles) { $targets += $commentFiles }
if ($threadedFiles) { $targets += $threadedFiles }
foreach ($xmlFile in $targets) {
$result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[X]" -UnzippedPath $UnzippedPath
if ($result.RemovedCount -gt 0) {
$changed++
$totalRemoved += $result.RemovedCount
}
}
# Volitelne: legacy revize
if ($CleanExcelRevisions) {
$revDir = Join-Path $xl "revisions"
if (Test-Path $revDir) {
$revFiles = Get-ChildItem -Path $revDir -Filter "*.xml" -File -ErrorAction SilentlyContinue
foreach ($xmlFile in $revFiles) {
$result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[XR]" -UnzippedPath $UnzippedPath
if ($result.RemovedCount -gt 0) {
$changed++
$totalRemoved += $result.RemovedCount
}
}
}
}
return @{ Changed = $changed; Removed = $totalRemoved }
}
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$tryPath = Join-Path $scriptDir $InputFile
if ($Test) {
Write-Host "DEBUG: Script dir: $scriptDir" -ForegroundColor Yellow
Write-Host "DEBUG: Try path: $tryPath" -ForegroundColor Yellow
}
if (Test-Path -LiteralPath $tryPath -PathType Leaf) {
$FullPath = [System.IO.Path]::GetFullPath($tryPath)
if ($Test) {
Write-Host "DEBUG: Soubor nalezen v try path: $FullPath" -ForegroundColor Green
}
} elseif (Test-Path -LiteralPath $InputFile -PathType Leaf) {
$FullPath = [System.IO.Path]::GetFullPath($InputFile)
if ($Test) {
Write-Host "DEBUG: Soubor nalezen v input path: $FullPath" -ForegroundColor Green
}
} else {
Write-Host "CHYBA: Soubor nebyl nalezen: $InputFile" -ForegroundColor Red
if ($Test) {
Write-Host "DEBUG: Zkusil jsem: $tryPath" -ForegroundColor Red
Write-Host "DEBUG: A take: $InputFile" -ForegroundColor Red
}
exit 1
}
$Extension = [System.IO.Path]::GetExtension($FullPath).ToLowerInvariant()
if ($Test) {
Write-Host "DEBUG: Extension: $Extension" -ForegroundColor Yellow
}
if ($Extension -notin $SupportedExtensions) {
Write-Host "CHYBA: Podporovane jsou pouze: $($SupportedExtensions -join ', ')" -ForegroundColor Red
exit 1
}
if ($Test -or $VerbosePreference -eq "Continue") {
Write-Host "POZOR: Modifikace znevazni digitalni podpisy (pokud existuji)" -ForegroundColor Yellow
}
if ($Test) {
Write-Host "TEST REZIM: Zadne zmeny nebudou provedeny" -ForegroundColor Cyan
}
$backupFile = New-VersionedBackup -FullPath $FullPath -BackupDirectory $BackupDir
Add-Type -AssemblyName System.IO.Compression.FileSystem -ErrorAction SilentlyContinue | Out-Null
Add-Type -AssemblyName System.IO.Compression -ErrorAction SilentlyContinue | Out-Null
$guid = [Guid]::NewGuid().ToString("N")
$workRoot = Join-Path $env:TEMP ("CleanTimestamps_" + $guid)
$unzipped = Join-Path $workRoot "unzipped"
if ($Test) {
Write-Host "DEBUG: Work root: $workRoot" -ForegroundColor Yellow
Write-Host "DEBUG: Unzipped path: $unzipped" -ForegroundColor Yellow
}
# Rozbal soubor i pro test (potrebujeme videt obsah)
New-Item -ItemType Directory -Path $workRoot,$unzipped -Force | Out-Null
if ($Test) {
Write-Host "DEBUG: Rozbaluji soubor..." -ForegroundColor Yellow
}
[System.IO.Compression.ZipFile]::ExtractToDirectory($FullPath, $unzipped)
if ($Test) {
Write-Host "DEBUG: Soubor rozbalen" -ForegroundColor Green
}
if (-not $Test) {
$fileName = Split-Path $FullPath -Leaf
if ($Extension -in @(".docx",".docm")) {
Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow
} else {
Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow
if ($CleanExcelRevisions) {
Write-Host "Vcetne revizi (historie bude ztracena)" -ForegroundColor Magenta
}
}
}
# Zpracuj podle typu souboru
$results = @{ Changed = 0; Removed = 0 }
$metadataResults = @{ Changed = 0; Removed = 0 }
$commentResults = @{ Changed = 0; Removed = 0 }
if ($Extension -in @(".docx",".docm")) {
# Standardni zpracovani XML souboru
if ($Test) {
Write-Host "DEBUG: Spoustim standardni zpracovani Word dokumentu" -ForegroundColor Cyan
}
$results = Process-WordDocument -UnzippedPath $unzipped
# Specificke zpracovani komentaru
if ($Test) {
Write-Host "DEBUG: Spoustim specificke zpracovani komentaru" -ForegroundColor Cyan
}
$commentResults = Process-CommentFiles -UnzippedPath $unzipped -FileType "Word"
# Metadata dokumentu
if ($Test) {
Write-Host "DEBUG: Spoustim zpracovani metadat" -ForegroundColor Cyan
}
$metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped
} elseif ($Extension -in @(".xlsx",".xlsm")) {
$results = Process-ExcelWorkbook -UnzippedPath $unzipped
$metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped
}
# Zabal zpatky (pouze pokud to neni test)
if (-not $Test) {
$tmpZip = Join-Path $workRoot "packed.zip"
if (Test-Path $FullPath) {
Remove-Item $FullPath -Force
}
[System.IO.Compression.ZipFile]::CreateFromDirectory($unzipped, $tmpZip, $CompressionLevel, $false)
Copy-Item $tmpZip $FullPath -Force
Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue
} else {
Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue
}
# Finalni vypis
Write-Host ""
if (-not $Test) {
Write-Host "=== VYSLEDKY ===" -ForegroundColor Green
} else {
Write-Host "=== VYSLEDKY ===" -ForegroundColor Magenta
}
if ($Test) {
Write-Host ""
Write-Host "TEST DOKONCEN: $FullPath" -ForegroundColor Cyan
Write-Host "Pro skutecne provedeni spustte bez parametru -Test" -ForegroundColor Yellow
# Ukaz co by se stalo
$totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed
$totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed
if ($totalRemoved -gt 0) {
Write-Host ""
Write-Host "TEST - Co by bylo odstraneno:" -ForegroundColor Yellow
Write-Host " Zmenene XML soubory: $totalChanged" -ForegroundColor White
Write-Host " Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White
Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray
Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray
Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray
} else {
Write-Host ""
Write-Host "TEST - Zadne casove atributy nebyly nalezeny" -ForegroundColor Gray
}
} else {
try {
$totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed
$totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed
Write-Host "Zmenene XML soubory: $totalChanged" -ForegroundColor White
Write-Host "Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White
Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray
Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray
Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray
if ($totalRemoved -eq 0) {
Write-Host "Vysledek: OK (zadne casove atributy nenalezeny)" -ForegroundColor Gray
} else {
Write-Host "Vysledek: OK" -ForegroundColor Green
}
} catch {
Write-Host "Vysledek: CHYBA - $($_.Exception.Message)" -ForegroundColor Red
exit 1
}
}
if ($Test) {
Write-Host "=== DEBUG END ===" -ForegroundColor Magenta
}