Advertisement
Lee_Dailey

Parse academic info

May 1st, 2017
263
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. $SourceFile = 'full path to the file here\input-file.txt'
  2. $DestFile = $SourceFile.Replace('input-file.txt', 'output-file.csv')
  3.  
  4.  
  5. # import and strip out unwanted 1st & 2nd lines
  6. $InFile = Get-Content -Path $SourceFile | Select-Object -skip 2
  7.  
  8. # create a working file just in case i fubar spectacularly
  9. $WorkingFile = $InFile
  10.  
  11. $Counter = 0
  12. while ($Counter -lt $WorkingFile.Count)
  13.     {
  14.     #debug info
  15.     #$Counter
  16.     #$WorkingFile[$Counter]
  17.     #
  18.    
  19.     # fix embedded spaces in header line
  20.     $WorkingFile[$Counter] = $WorkingFile[$Counter].Replace('T #', 'T_#').Replace('T N', 'T_N')
  21.     # fix duplicate MINOR headers
  22.     #    O = Optional, A = Additional
  23.     $WorkingFile[$Counter] = $WorkingFile[$Counter].Replace('MINOR MINOR MINOR MINOR', 'MINOR_O1 MINOR_O2 MINOR_A1 MINOR_A2')
  24.     # temporarily replace the space in "FName MiddleInitial" items with an underscore
  25.     #     regex = [one or more letters][single space][single letter][literal .][one or more spaces]
  26.     $WorkingFile[$Counter] = $WorkingFile[$Counter] -replace '(\w+)\s(\w\.\s+)', '$1_$2'
  27.     # test to see if there are NO items in the MINOR sections
  28.     #     regex = [four digits][one letter][one or more spaces][one digit][literal .][two digits]
  29.     if ($WorkingFile[$Counter] -match '\d{4}\w\s+\d\.\d{2}')
  30.         {
  31.         $WorkingFile[$Counter] = $WorkingFile[$Counter] -replace '(\d{4}\w)\s+(\d\.\d{2})', '$1,,,,,$2'
  32.         }
  33.         else
  34.         {
  35.         # nothing at this time since there is no data to test with
  36.         }
  37.     # replace multi-spaces with singletons
  38.     #    regex = [two or more spaces]
  39.     $WorkingFile[$Counter] = $WorkingFile[$Counter] -replace '\s{2,}', ' '
  40.     # trim any leading and/or trailing spaces
  41.     $WorkingFile[$Counter] = $WorkingFile[$Counter].Trim()
  42.     # replace any remaining spaces with a comma
  43.     $WorkingFile[$Counter] = $WorkingFile[$Counter].Replace(' ',',')
  44.     # undo the temporary underscore for "FName MiddleInitial" items
  45.     #    regex = [one or more letters][literal _][literal .][literal ,]
  46.     $WorkingFile[$Counter] = $WorkingFile[$Counter] -replace '(\w+)_(\w\.\,)', '$1 $2'
  47.    
  48.     #debug info
  49.     #$WorkingFile[$Counter]
  50.     #
  51.    
  52.     $Counter ++
  53.     }
  54.  
  55.  
  56. $OutFile = $WorkingFile | ConvertFrom-Csv
  57. $OutFile | Export-Csv -Path $DestFile -NoTypeInformation
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement