Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # cistis.ps1 - Odstranuje casove udaje z Office dokumentu, zachovava autory
- #
- param(
- [Parameter(Mandatory=$true, Position=0)]
- [string]$InputFile,
- [Parameter(Mandatory=$false)]
- [string]$BackupDir = "",
- [Parameter(Mandatory=$false)]
- [switch]$Test
- )
- # Konfigurace
- $CleanExcelRevisions = $true
- if ($Test) {
- $VerbosePreference = "Continue"
- } else {
- $VerbosePreference = "SilentlyContinue"
- }
- $DefaultBackupFolder = "zaloha"
- $CompressionLevel = [System.IO.Compression.CompressionLevel]::Optimal
- $XmlEncoding = [System.Text.Encoding]::UTF8
- # Regex vzory pro Word dokumenty (vcetne revizi a komentaru)
- $WordAttrPatterns = @(
- "\s+w:date\s*=\s*("".*?""|'.*?')",
- "\s+w14:date\s*=\s*("".*?""|'.*?')",
- "\s+w15:dateUtc\s*=\s*("".*?""|'.*?')",
- "\s+mso:date\s*=\s*("".*?""|'.*?')",
- "\s+w:modified\s*=\s*("".*?""|'.*?')",
- "\s+w:created\s*=\s*("".*?""|'.*?')",
- "\s+created\s*=\s*("".*?""|'.*?')",
- "\s+modified\s*=\s*("".*?""|'.*?')",
- "\s+lastModifiedTime\s*=\s*("".*?""|'.*?')",
- 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"',
- 'w:date\s*=\s*"[^"]*"',
- 'date\s*=\s*"[^"]*"',
- '\s+author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"',
- 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"'
- )
- # Regex vzory pro Excel dokumenty
- $ExcelAttrPatterns = @(
- "\s+(\w+:)?(date|dateUtc|created|creationDate|modified|modifiedTime|createdTime|time|dttm|lastModifiedTime)\s*=\s*("".*?""|'.*?')",
- "\s+dt\s*=\s*("".*?""|'.*?')",
- "\s+uid\s*=\s*(""[^""]*\d{4}-\d{2}-\d{2}[^""]*""|'[^']*\d{4}-\d{2}-\d{2}[^']*')"
- )
- # Vzory pro metadata dokumentu
- $MetadataPatterns = @(
- "<dcterms:created[^>]*>.*?</dcterms:created>",
- "<dcterms:modified[^>]*>.*?</dcterms:modified>",
- "<dcterms:created\s+xsi:type=""dcterms:W3CDTF"">.*?</dcterms:created>",
- "<dcterms:modified\s+xsi:type=""dcterms:W3CDTF"">.*?</dcterms:modified>",
- "<dc:created>.*?</dc:created>",
- "<dc:modified>.*?</dc:modified>"
- )
- $SupportedExtensions = @(".docx",".docm",".xlsx",".xlsm")
- if ($Test) {
- Write-Host "=== DEBUG START ===" -ForegroundColor Magenta
- Write-Host "Input file: $InputFile" -ForegroundColor Yellow
- Write-Host "Test mode: $Test" -ForegroundColor Yellow
- Write-Host "Verbose: $($VerbosePreference)" -ForegroundColor Yellow
- }
- # Nacita UTF-8 kodovany textovy soubor
- function Get-Utf8Text {
- param([string]$FilePath)
- return [System.IO.File]::ReadAllText($FilePath, $XmlEncoding)
- }
- # Zapise UTF-8 kodovany textovy soubor bez BOM
- function Set-Utf8Text {
- param([string]$FilePath, [string]$Content)
- [System.IO.File]::WriteAllText($FilePath, $Content, (New-Object System.Text.UTF8Encoding($false)))
- }
- # Vytvori verzovanou zalohu vstupniho souboru
- function New-VersionedBackup {
- param([string]$FullPath, [string]$BackupDirectory = "")
- $dir = Split-Path $FullPath -Parent
- $name = Split-Path $FullPath -Leaf
- $base = [System.IO.Path]::GetFileNameWithoutExtension($name)
- $ext = [System.IO.Path]::GetExtension($name)
- if ($BackupDirectory -and (Test-Path $BackupDirectory)) {
- $bdir = $BackupDirectory
- } else {
- $bdir = Join-Path $dir $DefaultBackupFolder
- }
- if (-not (Test-Path $bdir)) {
- New-Item -ItemType Directory -Path $bdir | Out-Null
- }
- $stamp = (Get-Date).ToString("yyyyMMdd_HHmmss")
- $bfile = Join-Path $bdir ("{0}_{1}{2}" -f $base,$stamp,$ext)
- if (-not $Test) {
- Copy-Item -LiteralPath $FullPath -Destination $bfile -Force
- }
- if (-not $Test) {
- Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green
- } else {
- Write-Host "Vytvarim zalohu: $bfile" -ForegroundColor Green
- }
- return $bfile
- }
- # Zpracuje jednotlivy XML soubor a odstrani casove atributy
- function Process-XmlFile {
- param(
- [System.IO.FileInfo]$XmlFile,
- [string[]]$Patterns,
- [string]$FilePrefix,
- [string]$UnzippedPath
- )
- $txt = Get-Utf8Text $XmlFile.FullName
- $fileRemoved = 0
- $originalTxt = $txt
- foreach ($pat in $Patterns) {
- $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat,
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
- [System.Text.RegularExpressions.RegexOptions]::Singleline)
- if ($matches.Count -gt 0) {
- Write-Verbose (" Nalezeno {0} vyskytov vzorem: {1}" -f $matches.Count, $pat)
- foreach ($match in $matches) {
- Write-Verbose (" Odstranuji: {0}" -f $match.Value.Trim())
- }
- $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "",
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
- [System.Text.RegularExpressions.RegexOptions]::Singleline)
- $fileRemoved += $matches.Count
- }
- }
- if ($fileRemoved -gt 0) {
- $relativePath = $XmlFile.FullName.Replace($UnzippedPath, '')
- if (-not $Test) {
- Set-Utf8Text $XmlFile.FullName $txt
- }
- Write-Verbose ("{0} {1} removed={2}" -f $FilePrefix, $relativePath, $fileRemoved)
- }
- return @{ RemovedCount = $fileRemoved }
- }
- # Zpracuje soubory komentaru specificke pro odstraneni casovych udaju
- function Process-CommentFiles {
- param([string]$UnzippedPath, [string]$FileType)
- if ($Test) {
- Write-Host "DEBUG: Zpracovavam komentare pro $FileType" -ForegroundColor Cyan
- }
- $changed = 0
- $totalRemoved = 0
- if ($FileType -eq "Word") {
- $wordFolder = Join-Path $UnzippedPath "word"
- $commentFiles = @()
- if ($Test) {
- Write-Host "DEBUG: Hledam komentare ve slozce: $wordFolder" -ForegroundColor Cyan
- }
- # Hlavni soubor komentaru
- $mainComments = Join-Path $wordFolder "comments.xml"
- if (Test-Path $mainComments) {
- $commentFiles += Get-Item $mainComments
- if ($Test) {
- Write-Host "DEBUG: Nalezen hlavni soubor komentaru: comments.xml" -ForegroundColor Green
- }
- }
- # Komentare v dalsich souborech
- $additionalComments = Get-ChildItem -Path $wordFolder -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue
- if ($additionalComments) {
- $commentFiles += $additionalComments
- if ($Test) {
- Write-Host "DEBUG: Nalezeno $($additionalComments.Count) dalsich souboru s komentari" -ForegroundColor Green
- }
- }
- # Zkontroluj take document.xml kde mohou byt komentare
- $docXml = Join-Path $wordFolder "document.xml"
- if (Test-Path $docXml) {
- $commentFiles += Get-Item $docXml
- if ($Test) {
- Write-Host "DEBUG: Pridavam document.xml pro kontrolu komentaru" -ForegroundColor Green
- }
- }
- } else {
- # Excel komentare
- $xl = Join-Path $UnzippedPath "xl"
- $commentFiles = Get-ChildItem -Path $xl -Filter "*comment*.xml" -Recurse -File -ErrorAction SilentlyContinue
- }
- if ($Test) {
- Write-Host "DEBUG: Celkem souboru ke kontrole: $($commentFiles.Count)" -ForegroundColor Cyan
- }
- foreach ($commentFile in $commentFiles) {
- if ($Test) {
- Write-Host "DEBUG: Zpracovavam komentare v: $($commentFile.Name)" -ForegroundColor Yellow
- }
- $txt = Get-Utf8Text $commentFile.FullName
- $originalTxt = $txt
- $fileRemoved = 0
- # Ukaz cast obsahu pro debug (pouze v testu)
- if ($Test) {
- Write-Host "DEBUG: Prvnich 200 znaku souboru:" -ForegroundColor Gray
- Write-Host ($txt.Substring(0, [Math]::Min(200, $txt.Length))) -ForegroundColor Gray
- }
- # Specialni vzory pro komentare
- $commentPatterns = @(
- 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^"]*"', # date="2025-08-25T11:39:00Z"
- 'w:date\s*=\s*"[^"]*"', # w:date="..."
- 'date\s*=\s*"[^"]*"', # obecny date="..."
- 'author\s*=\s*"[^"]*"\s+date\s*=\s*"[^"]*"', # author="..." date="..."
- 'date\s*=\s*"[^"]*\d{4}-\d{2}-\d{2}[^"]*"' # jakykoli date s datem
- )
- foreach ($pattern in $commentPatterns) {
- $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pattern,
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
- if ($matches.Count -gt 0) {
- if ($Test) {
- Write-Host " DEBUG: Nalezeno $($matches.Count) casovych udaju vzorem: $pattern" -ForegroundColor Green
- foreach ($match in $matches) {
- Write-Host " DEBUG: Odstranuji: $($match.Value)" -ForegroundColor Red
- }
- }
- # Uplne odstran casove atributy
- $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pattern, "",
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase)
- $fileRemoved += $matches.Count
- }
- }
- if ($fileRemoved -gt 0) {
- if (-not $Test) {
- Set-Utf8Text $commentFile.FullName $txt
- }
- $changed++
- $totalRemoved += $fileRemoved
- if ($Test) {
- Write-Host "[C] $($commentFile.Name) removed=$fileRemoved" -ForegroundColor Green
- }
- } else {
- if ($Test) {
- Write-Host "DEBUG: V souboru $($commentFile.Name) nebyly nalezeny casove udaje" -ForegroundColor Gray
- }
- }
- }
- return @{ Changed = $changed; Removed = $totalRemoved }
- }
- # Zpracuje metadata dokumentu a odstrani casove udaje
- function Process-DocumentMetadata {
- param([string]$UnzippedPath)
- $changed = 0
- $totalRemoved = 0
- # Zpracuj core.xml (zakladni metadata)
- $coreXml = Join-Path $UnzippedPath "docProps\core.xml"
- if (Test-Path $coreXml) {
- $txt = Get-Utf8Text $coreXml
- $originalTxt = $txt
- foreach ($pat in $MetadataPatterns) {
- $matches = [System.Text.RegularExpressions.Regex]::Matches($txt, $pat,
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
- [System.Text.RegularExpressions.RegexOptions]::Singleline)
- if ($matches.Count -gt 0) {
- $txt = [System.Text.RegularExpressions.Regex]::Replace($txt, $pat, "",
- [System.Text.RegularExpressions.RegexOptions]::IgnoreCase -bor
- [System.Text.RegularExpressions.RegexOptions]::Singleline)
- $totalRemoved += $matches.Count
- }
- }
- if ($txt -ne $originalTxt) {
- if (-not $Test) {
- Set-Utf8Text $coreXml $txt
- }
- $changed++
- Write-Verbose "[M] core.xml removed=$totalRemoved"
- }
- }
- return @{ Changed = $changed; Removed = $totalRemoved }
- }
- # Zpracuje Word dokument - najde a zpracuje vsechny XML soubory
- function Process-WordDocument {
- param([string]$UnzippedPath)
- $wordFolder = Join-Path $UnzippedPath "word"
- if (-not (Test-Path $wordFolder)) {
- throw "Neplatna DOCX struktura (chybi /word slozka)"
- }
- $targets = Get-ChildItem -Path $wordFolder -Filter *.xml -Recurse -File -ErrorAction SilentlyContinue
- $changed = 0
- $totalRemoved = 0
- if ($Test) {
- Write-Host "DEBUG: Nalezeno $($targets.Count) XML souboru ve Word slozce" -ForegroundColor Cyan
- }
- foreach ($xmlFile in $targets) {
- if ($Test) {
- Write-Host "DEBUG: Zpracovavam: $($xmlFile.Name)" -ForegroundColor Gray
- }
- $result = Process-XmlFile -XmlFile $xmlFile -Patterns $WordAttrPatterns -FilePrefix "[W]" -UnzippedPath $UnzippedPath
- if ($result.RemovedCount -gt 0) {
- $changed++
- $totalRemoved += $result.RemovedCount
- }
- }
- return @{ Changed = $changed; Removed = $totalRemoved }
- }
- # Zpracuje Excel sešit - najde a zpracuje komentare a revize
- function Process-ExcelWorkbook {
- param([string]$UnzippedPath)
- $xl = Join-Path $UnzippedPath "xl"
- if (-not (Test-Path $xl)) {
- throw "Neplatna XLSX struktura (chybi /xl slozka)"
- }
- $changed = 0
- $totalRemoved = 0
- # Komentare (legacy) a threaded komentare (moderni)
- $commentFiles = Get-ChildItem -Path $xl -Filter "comments*.xml" -Recurse -File -ErrorAction SilentlyContinue
- $threadedFiles = Get-ChildItem -Path $xl -Filter "threadedComments*.xml" -Recurse -File -ErrorAction SilentlyContinue
- $targets = @()
- if ($commentFiles) { $targets += $commentFiles }
- if ($threadedFiles) { $targets += $threadedFiles }
- foreach ($xmlFile in $targets) {
- $result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[X]" -UnzippedPath $UnzippedPath
- if ($result.RemovedCount -gt 0) {
- $changed++
- $totalRemoved += $result.RemovedCount
- }
- }
- # Volitelne: legacy revize
- if ($CleanExcelRevisions) {
- $revDir = Join-Path $xl "revisions"
- if (Test-Path $revDir) {
- $revFiles = Get-ChildItem -Path $revDir -Filter "*.xml" -File -ErrorAction SilentlyContinue
- foreach ($xmlFile in $revFiles) {
- $result = Process-XmlFile -XmlFile $xmlFile -Patterns $ExcelAttrPatterns -FilePrefix "[XR]" -UnzippedPath $UnzippedPath
- if ($result.RemovedCount -gt 0) {
- $changed++
- $totalRemoved += $result.RemovedCount
- }
- }
- }
- }
- return @{ Changed = $changed; Removed = $totalRemoved }
- }
- $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
- $tryPath = Join-Path $scriptDir $InputFile
- if ($Test) {
- Write-Host "DEBUG: Script dir: $scriptDir" -ForegroundColor Yellow
- Write-Host "DEBUG: Try path: $tryPath" -ForegroundColor Yellow
- }
- if (Test-Path -LiteralPath $tryPath -PathType Leaf) {
- $FullPath = [System.IO.Path]::GetFullPath($tryPath)
- if ($Test) {
- Write-Host "DEBUG: Soubor nalezen v try path: $FullPath" -ForegroundColor Green
- }
- } elseif (Test-Path -LiteralPath $InputFile -PathType Leaf) {
- $FullPath = [System.IO.Path]::GetFullPath($InputFile)
- if ($Test) {
- Write-Host "DEBUG: Soubor nalezen v input path: $FullPath" -ForegroundColor Green
- }
- } else {
- Write-Host "CHYBA: Soubor nebyl nalezen: $InputFile" -ForegroundColor Red
- if ($Test) {
- Write-Host "DEBUG: Zkusil jsem: $tryPath" -ForegroundColor Red
- Write-Host "DEBUG: A take: $InputFile" -ForegroundColor Red
- }
- exit 1
- }
- $Extension = [System.IO.Path]::GetExtension($FullPath).ToLowerInvariant()
- if ($Test) {
- Write-Host "DEBUG: Extension: $Extension" -ForegroundColor Yellow
- }
- if ($Extension -notin $SupportedExtensions) {
- Write-Host "CHYBA: Podporovane jsou pouze: $($SupportedExtensions -join ', ')" -ForegroundColor Red
- exit 1
- }
- if ($Test -or $VerbosePreference -eq "Continue") {
- Write-Host "POZOR: Modifikace znevazni digitalni podpisy (pokud existuji)" -ForegroundColor Yellow
- }
- if ($Test) {
- Write-Host "TEST REZIM: Zadne zmeny nebudou provedeny" -ForegroundColor Cyan
- }
- $backupFile = New-VersionedBackup -FullPath $FullPath -BackupDirectory $BackupDir
- Add-Type -AssemblyName System.IO.Compression.FileSystem -ErrorAction SilentlyContinue | Out-Null
- Add-Type -AssemblyName System.IO.Compression -ErrorAction SilentlyContinue | Out-Null
- $guid = [Guid]::NewGuid().ToString("N")
- $workRoot = Join-Path $env:TEMP ("CleanTimestamps_" + $guid)
- $unzipped = Join-Path $workRoot "unzipped"
- if ($Test) {
- Write-Host "DEBUG: Work root: $workRoot" -ForegroundColor Yellow
- Write-Host "DEBUG: Unzipped path: $unzipped" -ForegroundColor Yellow
- }
- # Rozbal soubor i pro test (potrebujeme videt obsah)
- New-Item -ItemType Directory -Path $workRoot,$unzipped -Force | Out-Null
- if ($Test) {
- Write-Host "DEBUG: Rozbaluji soubor..." -ForegroundColor Yellow
- }
- [System.IO.Compression.ZipFile]::ExtractToDirectory($FullPath, $unzipped)
- if ($Test) {
- Write-Host "DEBUG: Soubor rozbalen" -ForegroundColor Green
- }
- if (-not $Test) {
- $fileName = Split-Path $FullPath -Leaf
- if ($Extension -in @(".docx",".docm")) {
- Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow
- } else {
- Write-Host "Zpracovavam dokument: $fileName" -ForegroundColor Yellow
- if ($CleanExcelRevisions) {
- Write-Host "Vcetne revizi (historie bude ztracena)" -ForegroundColor Magenta
- }
- }
- }
- # Zpracuj podle typu souboru
- $results = @{ Changed = 0; Removed = 0 }
- $metadataResults = @{ Changed = 0; Removed = 0 }
- $commentResults = @{ Changed = 0; Removed = 0 }
- if ($Extension -in @(".docx",".docm")) {
- # Standardni zpracovani XML souboru
- if ($Test) {
- Write-Host "DEBUG: Spoustim standardni zpracovani Word dokumentu" -ForegroundColor Cyan
- }
- $results = Process-WordDocument -UnzippedPath $unzipped
- # Specificke zpracovani komentaru
- if ($Test) {
- Write-Host "DEBUG: Spoustim specificke zpracovani komentaru" -ForegroundColor Cyan
- }
- $commentResults = Process-CommentFiles -UnzippedPath $unzipped -FileType "Word"
- # Metadata dokumentu
- if ($Test) {
- Write-Host "DEBUG: Spoustim zpracovani metadat" -ForegroundColor Cyan
- }
- $metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped
- } elseif ($Extension -in @(".xlsx",".xlsm")) {
- $results = Process-ExcelWorkbook -UnzippedPath $unzipped
- $metadataResults = Process-DocumentMetadata -UnzippedPath $unzipped
- }
- # Zabal zpatky (pouze pokud to neni test)
- if (-not $Test) {
- $tmpZip = Join-Path $workRoot "packed.zip"
- if (Test-Path $FullPath) {
- Remove-Item $FullPath -Force
- }
- [System.IO.Compression.ZipFile]::CreateFromDirectory($unzipped, $tmpZip, $CompressionLevel, $false)
- Copy-Item $tmpZip $FullPath -Force
- Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue
- } else {
- Remove-Item $workRoot -Recurse -Force -ErrorAction SilentlyContinue
- }
- # Finalni vypis
- Write-Host ""
- if (-not $Test) {
- Write-Host "=== VYSLEDKY ===" -ForegroundColor Green
- } else {
- Write-Host "=== VYSLEDKY ===" -ForegroundColor Magenta
- }
- if ($Test) {
- Write-Host ""
- Write-Host "TEST DOKONCEN: $FullPath" -ForegroundColor Cyan
- Write-Host "Pro skutecne provedeni spustte bez parametru -Test" -ForegroundColor Yellow
- # Ukaz co by se stalo
- $totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed
- $totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed
- if ($totalRemoved -gt 0) {
- Write-Host ""
- Write-Host "TEST - Co by bylo odstraneno:" -ForegroundColor Yellow
- Write-Host " Zmenene XML soubory: $totalChanged" -ForegroundColor White
- Write-Host " Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White
- Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray
- Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray
- Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray
- } else {
- Write-Host ""
- Write-Host "TEST - Zadne casove atributy nebyly nalezeny" -ForegroundColor Gray
- }
- } else {
- try {
- $totalChanged = $results.Changed + $metadataResults.Changed + $commentResults.Changed
- $totalRemoved = $results.Removed + $metadataResults.Removed + $commentResults.Removed
- Write-Host "Zmenene XML soubory: $totalChanged" -ForegroundColor White
- Write-Host "Odstranenych casovych atributu: $totalRemoved" -ForegroundColor White
- Write-Host " - z obsahu: $($results.Removed)" -ForegroundColor Gray
- Write-Host " - z komentaru: $($commentResults.Removed)" -ForegroundColor Gray
- Write-Host " - z metadat: $($metadataResults.Removed)" -ForegroundColor Gray
- if ($totalRemoved -eq 0) {
- Write-Host "Vysledek: OK (zadne casove atributy nenalezeny)" -ForegroundColor Gray
- } else {
- Write-Host "Vysledek: OK" -ForegroundColor Green
- }
- } catch {
- Write-Host "Vysledek: CHYBA - $($_.Exception.Message)" -ForegroundColor Red
- exit 1
- }
- }
- if ($Test) {
- Write-Host "=== DEBUG END ===" -ForegroundColor Magenta
- }
Advertisement
Add Comment
Please, Sign In to add comment