Python class to improve Unicode support for sqlite

class SqliteUnicode :
    #
    def __init__(self) :
        self.collation = { u"a" : u"àáâãäåą",
                           u"e" : u"eèéêëęε",
                           u"i" : u"iìíîï",
                           u"o" : u"òóôõöøő",
                           u"u" : u"uûü",
                           u"c" : u"cç",
                           u"l" : u"ł",
                           u"n" : u"ñńŋ",
                           u"r" : u"ř",
                           u"s" : u"sš",
                           u"y" : u"ýÿ",
                           u"z" : u"źżž",
                           u"ae" : u"æÆ",
                           u"oe" : u"œŒ"

                           }

        self.collation_ci = {}
        self.reg_exp = {}

        for key, value in self.collation.iteritems() :
            for letter in value :
                self.collation_ci[letter] = value + value.upper()


        self.remove_accents = copy.deepcopy(self.collation)
        for key, value in remove_accents1.iteritems() :
            for letter in value :
                self.remove_accents[letter] = key

        # remove some other characters
        for letter in u"()[]<>…" :
            self.remove_accents[letter] = ""


        # regular expressions
        # We compile them to improve performance

        # These ones are used by collate
        self.re1 = re.compile(u"[àâä]")
        self.re2 = re.compile(u"[éèêë]")
        self.re3 = re.compile(u"[îï]")
        self.re4 = re.compile(u"[ôö]")
        self.re5 = re.compile(u"[ûü]")


    def replace_accents(self, string) :
    # This function will replace accentuated characters to allow a friendly like
    # e will find éèê etc. oe will find œ

        string2 = ""
        for letter in string :
            if letter in self.remove_accents :
                string2 += self.remove_accents[letter]
            else :
                string2 += letter
        return string2


    def convert_to_regex(self, data) :

        c = ""
        if data in self.reg_exp :
            return self.reg_exp[data]

        reg_string = "(?ims)"     # create the regular expression
        for c in data :         # divide string in letters
            if c in "*.[]$()" :    # escape special characters
                                # TODO : are there others to escape ? What about % and ?
                reg_string += "\\" + c
            elif c == "%" :
                reg_string += ".*"
            elif c in self.collation_ci :
                reg_string += "[" + self.collation_ci[c] + "]"     # using collation_ci is not useful with (?i)
                                                                   # It should not, but presently it is for an unknown reason
            else :
                reg_string += c
        if c != "%" :           # if the last character is not %
            reg_string += "$"   # necessary to match only the whole string
        try :
            comp_reg = re.compile(reg_string)
        except re.error as e :
            print "compile error", reg_string
        self.reg_exp[data] = comp_reg
        return comp_reg

    def like(self, a, b) :
        # On a 80000 rows table :
        # original like takes 0.032s
        # this like takes 0.25 s (about 8x)
        #print a,b
        try :
            if b == None or a == None :
                return False

            data = self.convert_to_regex(a)

            if data.match(b) :
                return True
            else :
                return False
        except :
            print "echec de like : ", a, "/", b
            utils.printExcept()
            return False


    def like2(self, a, b) :
        # On a 80000 rows table :
        # original like takes 0.032s
        # this like takes 0.25 s (about 8x)
##        print a,b
        try :
            if b == None or a == None :
                return False
            a = self.replace_accents(a.lower())
            b = self.replace_accents(b.lower())
            a= a.replace("%", "")

            if a == b[0:len(a)]  :
                return True
            else :
                return False
        except :
            print "echec de like : ", a, "/", b
            utils.printExcept()
            return False


    def regexp(self,a,b) :
        if b == None :
            return False
        if re.match(a,b) :
            return True
        else :
            return False

    def collate(self,string1, string2):

        string1 = unicode(string1,"utf_8")
        string2 = unicode(string2,"utf_8")
        string1 = string1.lower()
        string2 = string2.lower()

        string1 = self.re1.sub("a",string1)
        string1 = self.re2.sub("e",string1)
        string1 = self.re3.sub("i",string1)
        string1 = self.re4.sub("o",string1)
        string1 = self.re5.sub("u",string1)

        string2 = self.re1.sub("a",string2)
        string2 = self.re2.sub("e",string2)
        string2 = self.re3.sub("i",string2)
        string2 = self.re4.sub("o",string2)
        string2 = self.re5.sub("u",string2)

        compare = cmp(string1, string2)

        return compare

    def concat_ws(self, *args) :
        separator = args[0]
        data1 = args[1:]
        x= []
        # Remove None values
        for a in data1 :
            if a :
                x.append(a)
        return separator.join(x)


    def clean_commas(self, data1) :
        if data1 :
            data2 = re.findall("[0-9]+",data1)
            data3 =",".join(data2)
            return data3
        else :
            return data1

    def date2year(self,date1) :

        if date1 == None :
            return date1
        date1 =date1.split(",")[0]  # Delete all after a comma
        date2 = date1.split("-")
        if len(date2) < 2 :
            date2 = date1.split("/")
        if len(date2) == 3 :
            return date2[2]
        else :
            return date1


    def date2ymd(self,date1) :
        # converts dmy format to ymd to allow sorting
        # The job could be done with a regex
        separator = "-"
        if date1 == None :
            return date1
        date2 = date1.split("-")
        if len(date2) < 3 :
            date2 = date1.split("/")
            separator = "/"
        if len(date2) == 3 :
            if len(date2[0]) == 1 :
                date2[0] = "0" + date2[0]
            if len(date2[1]) == 1 :
                date2[1] = "0" + date2[1]

            return date2[2] + separator + date2[1] + separator + date2[0]
        else :
            return date1