Advertisement
Old-Lost

Encoding Helpers

Jul 14th, 2017
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <#
  2. .SYNOPSIS
  3. Converts files to the given encoding.
  4. Matches the include pattern recursively under the given path.
  5.  
  6. .EXAMPLE
  7. Convert-FileEncoding -Include *.js -Path scripts -Encoding UTF8
  8. #>
  9. function Convert-FileEncoding([string]$Include, [string]$Path, [string]$Encoding='UTF8') {
  10.   $count = 0
  11.   Get-ChildItem -Include $Pattern -Recurse -Path $Path `
  12.   | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} `
  13.   | where {$_.Encoding -ne $Encoding} `
  14.   | % { (Get-Content $_.FullName) `
  15.         | Out-File $_.FullName -Encoding $Encoding; $count++; }
  16.  
  17.   Write-Host "$count $Pattern file(s) converted to $Encoding in $Path."
  18. }
  19.  
  20. # http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html
  21. <#
  22. .SYNOPSIS
  23. Gets file encoding.
  24.  
  25. .DESCRIPTION
  26. The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM).
  27. Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx
  28.  
  29. .EXAMPLE
  30. Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'}
  31. This command gets ps1 files in current directory where encoding is not ASCII
  32.  
  33. .EXAMPLE
  34. Get-ChildItem  *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII}
  35. Same as previous example but fixes encoding using set-content
  36.  
  37.  
  38. # Modified by F.RICHARD August 2010
  39. # add comment + more BOM
  40. # http://unicode.org/faq/utf_bom.html
  41. # http://en.wikipedia.org/wiki/Byte_order_mark
  42. #
  43. # Do this next line before or add function in Profile.ps1
  44. # Import-Module .\Get-FileEncoding.ps1
  45. #>
  46. function Get-FileEncoding
  47. {
  48.   [CmdletBinding()]
  49.   Param (
  50.     [Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)]
  51.     [string]$Path
  52.   )
  53.  
  54.   [byte[]]$byte = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path
  55.   #Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3]
  56.  
  57.   # EF BB BF (UTF8)
  58.   if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf )
  59.   { Write-Output 'UTF8' }
  60.  
  61.   # FE FF  (UTF-16 Big-Endian)
  62.   elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff)
  63.   { Write-Output 'Unicode UTF-16 Big-Endian' }
  64.  
  65.   # FF FE  (UTF-16 Little-Endian)
  66.   elseif ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe)
  67.   { Write-Output 'Unicode UTF-16 Little-Endian' }
  68.  
  69.   # 00 00 FE FF (UTF32 Big-Endian)
  70.   elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff)
  71.   { Write-Output 'UTF32 Big-Endian' }
  72.  
  73.   # FE FF 00 00 (UTF32 Little-Endian)
  74.   elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0)
  75.   { Write-Output 'UTF32 Little-Endian' }
  76.  
  77.   # 2B 2F 76 (38 | 38 | 2B | 2F)
  78.   elseif ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) )
  79.   { Write-Output 'UTF7'}
  80.  
  81.   # F7 64 4C (UTF-1)
  82.   elseif ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c )
  83.   { Write-Output 'UTF-1' }
  84.  
  85.   # DD 73 66 73 (UTF-EBCDIC)
  86.   elseif ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73)
  87.   { Write-Output 'UTF-EBCDIC' }
  88.  
  89.   # 0E FE FF (SCSU)
  90.   elseif ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff )
  91.   { Write-Output 'SCSU' }
  92.  
  93.   # FB EE 28  (BOCU-1)
  94.   elseif ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 )
  95.   { Write-Output 'BOCU-1' }
  96.  
  97.   # 84 31 95 33 (GB-18030)
  98.   elseif ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33)
  99.   { Write-Output 'GB-18030' }
  100.  
  101.   else
  102.   { Write-Output 'ASCII' }
  103. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement