Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package word_count
- const (
- KindOther = iota
- KindLatin
- KindNum
- KindPun
- KindHan
- )
- func Kind(c rune) int {
- switch {
- // Chars in Basic Latin aka. ASCII
- case ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'):
- return KindLatin
- case ('0' <= c && c <= '9'):
- return KindNum
- // Synbols in Basic Latin aka. ASCII
- case ('\u0021' <= c && c <= '\u002F') || ('\u003A' <= c && c <= '\u0040') || ('\u005B' <= c && c <= '\u0060') || ('\u007B' <= c && c <= '\u007E'):
- return KindPun
- // Latin-1 Punctuatio & Symbols
- case ('\u00A1' <= c && c <= '\u00BF'):
- return KindPun
- // Unicode symbols
- case ('\u2013' <= c && c <= '\u201E'):
- return KindPun
- // CJK Symbols and Punctuation
- case ('\u3001' <= c && c <= '\u303F') || c == '\u2026' || c == '\u2032' || c == '\u2033':
- return KindPun
- // Halfwidth and Fullwidth Forms
- case ('\uFF01' <= c && c <= '\uFF0F') || ('\uFF1A' <= c && c <= '\uFF20') || ('\uFF3B' <= c && c <= '\uFF40') || ('\uFF5B' <= c && c <= '\uFF65'):
- return KindPun
- // CJK Unified Ideographs block
- case ('\u4E00' <= c && c <= '\u62FF') || ('\u6300' <= c && c <= '\u77FF') || ('\u7800' <= c && c <= '\u8CFF') || ('\u8D00' <= c && c <= '\u9FCC'):
- return KindHan
- // CJK Unified Ideographs block Ext A block
- case ('\u3400' <= c && c <= '\u4D85'):
- return KindHan
- // CJKUI Ext B block
- // CJKUI Ext C block
- // CJKUI Ext D block
- // CJKUI Ext E block
- // block above will be ignored cause there's barely text font support them
- default:
- return KindOther
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement