Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package locale
- import (
- "bufio"
- "fmt"
- "io"
- "regexp"
- "strconv"
- "strings"
- "unicode/utf8"
- )
- // TODO: Is this correct? Does it handle malformed input?
- var wordRegex = regexp.MustCompile(`[^\s"']+|"([^"]*)"|'([^']*)`)
- // Parse transforms a text-definition of a locale into a Def
- func Parse(in io.Reader) (def Def, err error) {
- s := bufio.NewScanner(in)
- commentChar := '#'
- escapeChar := '\\'
- lineNum := 0
- curCategory := ""
- // Catch any errors
- defer func() {
- if r := recover(); r != nil {
- def = Def{}
- err = fmt.Errorf("Line %d: %s", lineNum, r)
- }
- }()
- // A closure to simplify error handling
- die := func(format string, args ...interface{}) {
- panic(fmt.Errorf("line %d: %s", lineNum, fmt.Sprintf(format, args)))
- }
- // More closures to make assertions and parse input
- requireCategory := func(category, keyword string) {
- if category != curCategory {
- die("keyword %s is only applicable in category %s (current: '%s')", keyword, category, curCategory)
- }
- }
- requireString := func(val string) string {
- // TODO: Bug with escaped quote characters.
- if val[0] != '"' || val[len(val)-1] != '"' {
- die("Value '%s' needs to be a double-quoted string", val)
- }
- return val[1 : len(val)-1]
- }
- requireInt := func(val string) int {
- n, err := strconv.Atoi(val)
- if err != nil {
- die("%s is not an integer", val)
- }
- return n
- }
- requireChar := func(val string) string {
- if val[0] != '<' || val[len(val)-1] != '>' {
- die("character %s must be surrounded by angle brackets", val)
- }
- return val[1 : len(val)-1]
- }
- requireCharList := func(val string) []string {
- var out []string
- for _, s := range strings.Split(val, ";") {
- out = append(out, requireChar(s))
- }
- return out
- }
- requireCharPair := func(pair string) (first, second string) {
- if pair[0] != '(' || pair[len(pair)-1] != ')' {
- die("character pair '%s' not in parentheses", pair)
- }
- parts := strings.Split(pair[1:len(pair)-1], ",")
- if len(parts) != 2 {
- die("Need 2 elements in pair '%s'", pair)
- }
- return requireChar(parts[0]), requireChar(parts[1])
- }
- for s.Scan() {
- lineNum++
- line := s.Text()
- // Discard comment lines
- firstRune, _ := utf8.DecodeRuneInString(line)
- if firstRune == commentChar {
- continue
- }
- // Join newlines if they end with the escapeChar
- // TODO: This is very inefficient
- for {
- var lastRune rune
- for _, r := range line {
- lastRune = r
- }
- if lastRune != escapeChar {
- break
- }
- if lastRune == escapeChar {
- if !s.Scan() {
- die("Line continuation at end of file")
- return Def{}, fmt.Errorf("line continuation on line %d was not followed by another line", lineNum)
- }
- line += strings.TrimSpace(s.Text())
- lineNum++
- }
- }
- // TODO: Find more robust way to split
- words := wordRegex.FindAllString(line, -1)
- if len(words) == 0 {
- continue
- }
- if len(words) == 1 {
- // Only category declarations can have only 1 word
- switch words[0] {
- case "LC_CTYPE", "LC_COLLATE", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", "LC_MESSAGES":
- curCategory = words[0]
- default:
- die("Unrecognized category name %s", words[0])
- }
- continue
- }
- // No line can have more than 2 words
- if len(words) > 2 {
- return Def{}, fmt.Errorf("line %d contains too many words: %s", lineNum, words)
- }
- // Logic!
- keyword, val := words[0], words[1]
- switch keyword {
- // Header blocks
- case "comment_char":
- if curCategory != "" {
- return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory)
- }
- commentChar, _ = utf8.DecodeRuneInString(words[1])
- case "escape_char":
- if curCategory != "" {
- return Def{}, fmt.Errorf("line %d: unexpected %s during category %s", lineNum, keyword, curCategory)
- }
- escapeChar, _ = utf8.DecodeRuneInString(words[1])
- // End block
- case "END":
- if val != curCategory {
- return Def{}, fmt.Errorf("line %d: tried to end category %s during %s", lineNum, val, curCategory)
- }
- curCategory = ""
- // Ctype values
- case "charclass":
- requireCategory("LC_CTYPE", keyword)
- for _, cc := range strings.Split(val, ";") {
- if _, in := def.ctype.other[cc]; !in {
- def.ctype.other[cc] = make([]string, 0)
- }
- }
- case "upper":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.upper = requireCharList(val)
- case "lower":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.lower = requireCharList(val)
- case "alpha":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.alpha = requireCharList(val)
- case "digit":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.digit = requireCharList(val)
- case "alnum":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.alnum = requireCharList(val)
- case "space":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.space = requireCharList(val)
- case "cntrl":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.cntrl = requireCharList(val)
- case "punct":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.punct = requireCharList(val)
- case "graph":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.graph = requireCharList(val)
- case "print":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.print = requireCharList(val)
- case "xdigit":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.xdigit = requireCharList(val)
- case "blank":
- requireCategory("LC_CTYPE", keyword)
- def.ctype.blank = requireCharList(val)
- case "toupper":
- for _, pair := range strings.Split(val, ";") {
- lower, upper := requireCharPair(pair)
- def.ctype.toupper[lower] = upper
- }
- case "tolower":
- for _, pair := range strings.Split(val, ";") {
- upper, lower := requireCharPair(pair)
- def.ctype.tolower[upper] = lower
- }
- // Monetary values
- case "int_curr_symbol":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intCurrSymbol = requireString(val)
- case "currency_symbol":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.currencySymbol = requireString(val)
- case "mon_decimal_point":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.monDecimalPoint = requireString(val)
- case "mon_thousands_sep":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.monThousandsSep = requireString(val)
- case "mon_grouping":
- requireCategory("LC_MONETARY", keyword)
- for _, s := range strings.Split(val, ";") {
- def.monetary.monGrouping = append(def.monetary.monGrouping, requireInt(s))
- }
- case "positive_sign":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.positiveSign = val
- case "negative_sign":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.negativeSign = val
- case "int_frac_digits":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intFracDigits = requireInt(val)
- case "frac_digits":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.fracDigits = requireInt(val)
- case "p_cs_precedes":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.pCsPrecedes = requireInt(val)
- case "p_sep_by_space":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.pSepBySpace = requireInt(val)
- case "n_cs_precedes":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.nCsPrecedes = requireInt(val)
- case "n_sep_by_space":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.nSepBySpace = requireInt(val)
- case "p_sign_posn":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.pSignPosn = requireInt(val)
- case "n_sign_posn":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.nSignPosn = requireInt(val)
- case "int_p_cs_precedes":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intPcsPrecedes = requireInt(val)
- case "int_p_sep_by_space":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intPsepBySpace = requireInt(val)
- case "int_n_cs_precedes":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intNcsPrecedes = requireInt(val)
- case "int_n_sep_by_space":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intNsepBySpace = requireInt(val)
- case "int_p_sign_posn":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intPsignPosn = requireInt(val)
- case "int_n_sign_posn":
- requireCategory("LC_MONETARY", keyword)
- def.monetary.intNsignPosn = requireInt(val)
- default:
- // Handle more complex values
- switch {
- case curCategory == "LC_CTYPE" && def.ctype.other[keyword] != nil:
- def.ctype.other[keyword] = append(def.ctype.other[keyword], val)
- default:
- die("misc error: %s", line)
- }
- }
- }
- if curCategory != "" {
- die("Category %s never ended", curCategory)
- }
- return def, nil
- }
Add Comment
Please, Sign In to add comment