Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.46 KB | None | 0 0
  1. package word_count
  2.  
  3. const (
  4. KindOther = iota
  5. KindLatin
  6. KindNum
  7. KindPun
  8. KindHan
  9. )
  10.  
  11. func Kind(c rune) int {
  12. switch {
  13. // Chars in Basic Latin aka. ASCII
  14. case ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'):
  15. return KindLatin
  16.  
  17. case ('0' <= c && c <= '9'):
  18. return KindNum
  19.  
  20. // Synbols in Basic Latin aka. ASCII
  21. case ('\u0021' <= c && c <= '\u002F') || ('\u003A' <= c && c <= '\u0040') || ('\u005B' <= c && c <= '\u0060') || ('\u007B' <= c && c <= '\u007E'):
  22. return KindPun
  23.  
  24. // Latin-1 Punctuatio & Symbols
  25. case ('\u00A1' <= c && c <= '\u00BF'):
  26. return KindPun
  27.  
  28. // Unicode symbols
  29. case ('\u2013' <= c && c <= '\u201E'):
  30. return KindPun
  31.  
  32. // CJK Symbols and Punctuation
  33. case ('\u3001' <= c && c <= '\u303F') || c == '\u2026' || c == '\u2032' || c == '\u2033':
  34. return KindPun
  35.  
  36. // Halfwidth and Fullwidth Forms
  37. case ('\uFF01' <= c && c <= '\uFF0F') || ('\uFF1A' <= c && c <= '\uFF20') || ('\uFF3B' <= c && c <= '\uFF40') || ('\uFF5B' <= c && c <= '\uFF65'):
  38. return KindPun
  39.  
  40. // CJK Unified Ideographs block
  41. case ('\u4E00' <= c && c <= '\u62FF') || ('\u6300' <= c && c <= '\u77FF') || ('\u7800' <= c && c <= '\u8CFF') || ('\u8D00' <= c && c <= '\u9FCC'):
  42. return KindHan
  43.  
  44. // CJK Unified Ideographs block Ext A block
  45. case ('\u3400' <= c && c <= '\u4D85'):
  46. return KindHan
  47.  
  48. // CJKUI Ext B block
  49. // CJKUI Ext C block
  50. // CJKUI Ext D block
  51. // CJKUI Ext E block
  52. // block above will be ignored cause there's barely text font support them
  53.  
  54. default:
  55. return KindOther
  56. }
  57. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement