SHARE
TWEET

Untitled

a guest Jul 23rd, 2019 58 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. package word_count
  2.  
  3. const (
  4.     KindOther = iota
  5.     KindLatin
  6.     KindNum
  7.     KindPun
  8.     KindHan
  9. )
  10.  
  11. func Kind(c rune) int {
  12.     switch {
  13.     // Chars in Basic Latin aka. ASCII
  14.     case ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'):
  15.         return KindLatin
  16.  
  17.     case ('0' <= c && c <= '9'):
  18.         return KindNum
  19.  
  20.     // Synbols in Basic Latin aka. ASCII
  21.     case ('\u0021' <= c && c <= '\u002F') || ('\u003A' <= c && c <= '\u0040') || ('\u005B' <= c && c <= '\u0060') || ('\u007B' <= c && c <= '\u007E'):
  22.         return KindPun
  23.  
  24.     // Latin-1 Punctuatio & Symbols
  25.     case ('\u00A1' <= c && c <= '\u00BF'):
  26.         return KindPun
  27.  
  28.     // Unicode symbols
  29.     case ('\u2013' <= c && c <= '\u201E'):
  30.         return KindPun
  31.  
  32.     // CJK Symbols and Punctuation
  33.     case ('\u3001' <= c && c <= '\u303F') || c == '\u2026' || c == '\u2032' || c == '\u2033':
  34.         return KindPun
  35.  
  36.     // Halfwidth and Fullwidth Forms
  37.     case ('\uFF01' <= c && c <= '\uFF0F') || ('\uFF1A' <= c && c <= '\uFF20') || ('\uFF3B' <= c && c <= '\uFF40') || ('\uFF5B' <= c && c <= '\uFF65'):
  38.         return KindPun
  39.  
  40.     // CJK Unified Ideographs block
  41.     case ('\u4E00' <= c && c <= '\u62FF') || ('\u6300' <= c && c <= '\u77FF') || ('\u7800' <= c && c <= '\u8CFF') || ('\u8D00' <= c && c <= '\u9FCC'):
  42.         return KindHan
  43.  
  44.     // CJK Unified Ideographs block Ext A block
  45.     case ('\u3400' <= c && c <= '\u4D85'):
  46.         return KindHan
  47.  
  48.     // CJKUI Ext B block
  49.     // CJKUI Ext C block
  50.     // CJKUI Ext D block
  51.     // CJKUI Ext E block
  52.     // block above will be ignored cause there's barely text font support them
  53.  
  54.     default:
  55.         return KindOther
  56.     }
  57. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top