Advertisement
devinteske

Parsing CSV with quoted comma in awk

May 12th, 2018
1,820
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Awk 0.67 KB | None | 0 0
  1. #!/usr/bin/awk -f
  2. # Sample 1: 3141,"Smith, John",$52000
  3. #   col[0] = 3141
  4. #   col[1] = Smith, John
  5. #   col[2] = $52000
  6. # Sample 2: 3141,"John ""The Rocket"" Smith",$52000
  7. #   col[0] = 3141
  8. #   col[1] = John "The Rocket" Smith
  9. #   col[2] = $52000
  10. head = $0 {
  11.     delete col
  12.     n = 0
  13.     while (match(head, /^("[^"]*"(,|$)|[^,]*(,|$))/)) {
  14.         item = substr(head, 1, RLENGTH)
  15.         sub(/,$/, "", item)
  16.         if (item ~ /^"/) item = substr(item, 2, length(item) - 2)
  17.         gsub(/""/, "\"", item) # "" -> "
  18.         gsub(/\\"/, "\"", item) # \" -> "
  19.         col[n++] = item
  20.         head = substr(head, RLENGTH + 1)
  21.         if (length(head) == 0) break
  22.     }
  23.     for (i = 0; i < n; i++) printf "col[%u] = %s\n", i, col[i]
  24. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement