Untitled

def decodeUnicodeGeneralCategory(abbr, sortable=True, desc=False):
    """Decode Unicode General Category (gc) code.

    Every Unicode code point has a set of properties. One property is called
    General Category and its value indicates whether the code point represents
    a letter, numeral, symbol et cetera.

    These values can be an abbreviation (abbr) or written out (long) and can
    have a description (desc). The one-letter abbreviations are groups of
    two-letter general categories abbreviations with the same initial letter.
    These groups are never used as a property for for Unicode point properties
    as can be seen in
    http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
    for example.

    Sources are for development and maintenance are:
    http://www.unicode.org/reports/tr18/#General_Category_Property
    http://www.unicode.org/reports/tr44/#General_Category_Values
    http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
    Especially the latter is important as it offers the latest definitions.
    Latest update is from Unicode version 6.2.0, 2012-08-14, 16:05:11 GMT [MD]

    :param abbr: Unicode General Category code
    :param sortable: output long is alphabetically sortable
    :param desc: output is non-sortable desc instead of long
    :returns: long name or description for abbreviated Unicode General Category
    """
    gc = {
    'C' :('Other',                  'Other',
    'Cc | Cf | Cn | Co | Cs'),
    'Cc':('Control',                'Control',
    'a C0 or C1 control code'),# a.k.a. cntrl
    'Cf':('Format',                 'Format',
    'a format control character'),
    'Cn':('Unassigned',             'Unassigned',
    'a reserved unassigned code point or a noncharacter'),
    'Co':('Private Use',            'Private_Use',
    'a private-use character'),
    'Cs':('Surrogate',              'Surrogate',
    'a surrogate code point'),
    'L' :('Letter',                 'Letter',
    'Ll | Lm | Lo | Lt | Lu'),
    'LC':('Letter, Cased',          'Cased_Letter',
    'Ll | Lt | Lu'),
    'Ll':('Letter, Lowercase',      'Lowercase_Letter',
    'a lowercase letter'),
    'Lm':('Letter, Modifier',       'Modifier_Letter',
    'a modifier letter'),
    'Lo':('Letter, Other',          'Other_Letter',
    'other letters, including syllables and ideographs'),
    'Lt':('Letter, Titlecase',      'Titlecase_Letter',
    'a digraphic character, with first part uppercase'),
    'Lu':('Letter, Uppercase',      'Uppercase_Letter',
    'an uppercase letter'),
    'M' :('Mark',                   'Mark',
    'Mc | Me | Mn '),# a.k.a. Combining_Mark
    'Mc':('Mark, Spacing',          'Spacing_Mark',
    'a spacing combining mark (positive advance width)'),
    'Me':('Mark, Enclosing',        'Enclosing_Mark',
    'an enclosing combining mark'),
    'Mn':('Mark, Nonspacing',       'Nonspacing_Mark',
    'a nonspacing combining mark (zero advance width)'),
    'N' :('Number',                 'Number',
    'Nd | Nl | No'),
    'Nd':('Number, Decimal',        'Decimal_Number',
    'a decimal digit'),# a.k.a. digit
    'Nl':('Number, Letter',         'Letter_Number',
    'a letterlike numeric character'),
    'No':('Number, Other',          'Other_Number',
    'a numeric character of other type'),
    'P' :('Punctuation',            'Punctuation',
    'Pc | Pd | Pe | Pf | Pi | Po | Ps'),# a.k.a. punct
    'Pc':('Punctuation, Connector', 'Connector_Punctuation',
    'a connecting punctuation mark, like a tie'),
    'Pd':('Punctuation, Dash',      'Dash_Punctuation',
    'a dash or hyphen punctuation mark'),
    'Pe':('Punctuation, Close',     'Close_Punctuation',
    'a closing punctuation mark (of a pair)'),
    'Pf':('Punctuation, Final',     'Final_Punctuation',
    'a final quotation mark'),
    'Pi':('Punctuation, Initial',   'Initial_Punctuation',
    'an initial quotation mark'),
    'Po':('Punctuation, Other',     'Other_Punctuation',
    'a punctuation mark of other type'),
    'Ps':('Punctuation, Open',      'Open_Punctuation',
    'an opening punctuation mark (of a pair)'),
    'S' :('Symbol',                 'Symbol',
    'Sc | Sk | Sm | So'),
    'Sc':('Symbol, Currency',       'Currency_Symbol',
    'a currency sign'),
    'Sk':('Symbol, Modifier',       'Modifier_Symbol',
    'a non-letterlike modifier symbol'),
    'Sm':('Symbol, Math',           'Math_Symbol',
    'a symbol of mathematical use'),
    'So':('Symbol, Other',          'Other_Symbol',
    'a symbol of other type'),
    'Z' :('Separator',              'Separator',
    'Zl | Zp | Zs'),
    'Zl':('Separator, Line',        'Line_Separator',
    'U+2028 LINE SEPARATOR only'),
    'Zp':('Separator, Paragraph',   'Paragraph_Separator',
    'U+2029 PARAGRAPH SEPARATOR only'),
    'Zs':('Separator, Space',       'Space_Separator',
    'a space character (of various non-zero widths)'),
    }
    if abbr not in gc:
        raise Exception('Unknown general category abbreviation:', abbr)
    if desc:
        return gc[abbr][2]
    elif sortable:
        return gc[abbr][0]
    else:
        return gc[abbr][1]