FatalBulletHit

String Finder (for large files)

Mar 3rd, 2018 (edited)
216
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #################################################################################################################
  2. # https://stackoverflow.com/a/49084759/9248774
  3. #
  4. # Searches for a user defined string in the $input_file and counts matches. Works with files of any size.
  5. #
  6. # Adjust source directory and input file name.
  7. #
  8. $source = "C:\adjust\path"
  9. $input_file = "file_name.extension"
  10. #
  11. #
  12. # Define the string you want to search for. Keep quotation marks even if you only search for numbers (otherwise
  13. # $pattern.Length will be 1 and this script will no longer work with files larger than the $split_size)!
  14. #
  15. $pattern = "Enter the string to search for in here"
  16. #
  17. #
  18. # Using Get-Content on an input file with a size of 1GB or more will cause System.OutOfMemoryExceptions,
  19. # therefore a large file gets temporarily split up.
  20. #
  21. $split_size = 100MB
  22. #
  23. #
  24. # Thanks @Bob (https://superuser.com/a/1295082/868077)
  25. #################################################################################################################
  26.  
  27. Set-Location $source
  28.  
  29.  
  30. if (test-path ".\_split") {
  31.    
  32.     while ($overwrite -ne "true" -and $overwrite -ne "false") {
  33.        
  34.         "`n"
  35.         $overwrite = Read-Host ' Splitted files already/still exist! Delete and overwrite?'
  36.        
  37.         if ($overwrite -match "y") {
  38.        
  39.             $overwrite = "true"
  40.             Remove-Item .\_split -force -recurse
  41.             $a = "`n Deleted existing splitted files!"
  42.        
  43.         } elseif ($overwrite -match "n") {
  44.        
  45.             $overwrite = "false"
  46.             $a = "`n Continuing with existing splitted files!"
  47.            
  48.         } elseif ($overwrite -match "c") {
  49.            
  50.             exit
  51.            
  52.         } else {
  53.            
  54.             Write-Host "`n Error: Invalid input!`n Type 'y' for 'yes'. Type 'n' for 'no'. Type 'c' for 'cancel'. `n`n`n"
  55.            
  56.         }
  57.  
  58.     }
  59.  
  60. }
  61.  
  62. Clear-Host
  63.  
  64.  
  65. if ((Get-Item $input_file).Length -gt $split_size) {
  66.  
  67.     while ($delete -ne "true" -and $delete -ne "false") {
  68.        
  69.         "`n"
  70.         $delete = Read-Host ' Delete splitted files afterwards?'
  71.        
  72.         if ($delete -match "y") {
  73.        
  74.             $delete = "true"
  75.             $b = "`n Splitted files will be deleted afterwards!"
  76.        
  77.         } elseif ($delete -match "n") {
  78.        
  79.             $delete = "false"
  80.             $b = "`n Splitted files will not be deleted afterwards!"
  81.            
  82.         } elseif ($delete -match "c") {
  83.            
  84.             exit
  85.            
  86.         } else {
  87.            
  88.             Write-Host "`n Error: Invalid input!`n Type 'y' for 'yes'. Type 'n' for 'no'. Type 'c' for 'cancel'. `n`n`n"
  89.            
  90.         }
  91.        
  92.     }
  93.    
  94.     Clear-Host
  95.    
  96.     $a
  97.     $b
  98.    
  99.  
  100.     Write-Host `n This may take some time!
  101.  
  102.     if ($overwrite -ne "false") {
  103.  
  104.         New-Item -ItemType directory -Path ".\_split" >$null 2>&1
  105.         [Environment]::CurrentDirectory = Get-Location
  106.  
  107.         $bytes = New-Object byte[] 4096
  108.         $in_file = [System.IO.File]::OpenRead($input_file)
  109.         $file_count = 0
  110.         $finished = $false
  111.  
  112.         while (!$finished) {
  113.        
  114.             $file_count++
  115.             $bytes_to_read = $split_size
  116.             $out_file = New-Object System.IO.FileStream ".\_split\_split_$file_count.splt",CreateNew,Write,None
  117.            
  118.             while ($bytes_to_read) {
  119.            
  120.                 $bytes_read = $in_file.Read($bytes, 0, [Math]::Min($bytes.Length, $bytes_to_read))
  121.                
  122.                 if (!$bytes_read) {
  123.                
  124.                     $finished = $true
  125.                     break
  126.                    
  127.                 }
  128.                
  129.                 $bytes_to_read -= $bytes_read
  130.                 $out_file.Write($bytes, 0, $bytes_read)
  131.                
  132.             }
  133.            
  134.             $out_file.Dispose()
  135.            
  136.         }
  137.            
  138.         $in_file.Dispose()
  139.            
  140.     }
  141.  
  142.     $i++
  143.  
  144.     while (Test-Path ".\_split\_split_$i.splt") {
  145.  
  146.         $cur_file = (Get-Content ".\_split\_split_$i.splt")
  147.         $temp_count = ([regex]::Matches($cur_file, "$pattern")).Count
  148.         $match_count += $temp_count
  149.  
  150.         $n = $i - 1
  151.  
  152.         if (Test-Path ".\_split\_split_$n.splt") {
  153.            
  154.             if ($cur_file.Length -ge $pattern.Length) {
  155.                
  156.                 $file_transition = $prev_file.Substring($prev_file.Length - ($pattern.Length - 1)) + $cur_file.Substring(0,($pattern.Length - 1))
  157.            
  158.             } else {
  159.            
  160.                 $file_transition = $prev_file.Substring($prev_file.Length - ($pattern.Length - 1)) + $cur_file
  161.            
  162.             }
  163.            
  164.             $temp_count = ([regex]::Matches($file_transition, "$pattern")).Count
  165.             $match_count += $temp_count
  166.            
  167.         }
  168.  
  169.         $prev_file = $cur_file
  170.         $i++
  171.  
  172.     }
  173.  
  174. } else {
  175.  
  176.     $a
  177.     $match_count = ([regex]::Matches($input_file, "$pattern")).Count
  178.  
  179. }
  180.  
  181.  
  182. if ($delete -eq "true") {
  183.  
  184.     Remove-Item ".\_split" -Force -Recurse
  185.  
  186. }
  187.  
  188.  
  189. if ($match_count -ge 1) {
  190.  
  191.     Write-Host "`n`n String '$pattern' found:`n`n $match_count matches!"
  192.  
  193. } else {
  194.  
  195.     Write-Host "`n`n String '$pattern' not found!"
  196.  
  197. }
  198.  
  199.  
  200. Write-Host `n`n`n`n`n
  201.  
  202. Pause
Add Comment
Please, Sign In to add comment