Advertisement
pligor

GSM string normalizer

Feb 27th, 2014
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 2.29 KB | None | 0 0
  1. /**
  2.  * Created with IntelliJ IDEA.
  3.  * Developer: pligor
  4.  * http://www.csoft.co.uk/sms/character_sets/gsm.htm
  5.  */
  6. object GSMnormalizer {
  7.  
  8.   private val map = Map(
  9.     'α' -> 'A', 'ά' -> 'A', 'Α' -> 'A', 'Ά' -> 'A',
  10.     'β' -> 'B', 'Β' -> 'B',
  11.     'γ' -> 'Γ', //gamma kefalaio ws exei
  12.     'δ' -> 'Δ', //delta kefalaio ws exei
  13.     'ε' -> 'E', 'έ' -> 'E', 'Ε' -> 'E', 'Έ' -> 'E',
  14.     'ζ' -> 'Z', 'Ζ' -> 'Z',
  15.     'η' -> 'H', 'ή' -> 'H', 'Η' -> 'H', 'Ή' -> 'H',
  16.     'θ' -> 'Θ', //thita kefalaio ws exei
  17.     'ι' -> 'I', 'ί' -> 'I', 'Ι' -> 'I', 'Ί' -> 'I', 'ϊ' -> 'I', 'ΐ' -> 'I', 'Ϊ' -> 'I', //giota kefalaio me dialytika yparxei?
  18.     'κ' -> 'K', 'Κ' -> 'K',
  19.     'λ' -> 'Λ', //lamda kefalaio ws exei
  20.     'μ' -> 'M', 'Μ' -> 'M',
  21.     'ν' -> 'N', 'Ν' -> 'N',
  22.     'ξ' -> 'Ξ', //ksi kefalaio ws exei
  23.     'ο' -> 'O', 'ό' -> 'O', 'Ο' -> 'O', 'Ό' -> 'O',
  24.     'π' -> 'Π', //pi kefalaio ws exei
  25.     'ρ' -> 'P', 'Ρ' -> 'P',
  26.     'σ' -> 'Σ', 'ς' -> 'Σ', //sigma kefalaio ws exei
  27.     'τ' -> 'T', 'Τ' -> 'T',
  28.     'υ' -> 'Y', 'ύ' -> 'Y', 'Υ' -> 'Y', 'Ύ' -> 'Y', 'ϋ' -> 'Y', 'ΰ' -> 'Y', 'Ϋ' -> 'Y',
  29.     'φ' -> 'Φ', //fi kefalaio ws exei
  30.     'χ' -> 'X', 'Χ' -> 'X',
  31.     'ψ' -> 'Ψ', //psi kefalaio ws exei
  32.     'ω' -> 'Ω', 'ώ' -> 'Ω' //omega kefalaio ws exei
  33.   )
  34.  
  35.   private val GSMsymbols = Seq(
  36.     '@', 'Δ', ' ', '0', '¡', 'P', 'p',
  37.     '£', '_', '!', '1', 'A', 'Q', 'a', 'q',
  38.     '$', 'Φ', '"', '2', 'B', 'R', 'b', 'r',
  39.     '¥', 'Γ', '#', '3', 'C', 'S', 'c', 's',
  40.     'è', 'Λ', '¤', '4', 'D', 'T', 'd', 't',
  41.     'é', 'Ω', '%', '5', 'E', 'U', 'e', 'u',
  42.     'ù', 'Π', '&', '6', 'F', 'V', 'f', 'v',
  43.     'ì', 'Ψ', '\'', '7', 'G', 'W', 'g', 'w',
  44.     'ò', 'Σ', '(', '8', 'H', 'X', 'h', 'x',
  45.     'Ç', 'Θ', ')', '9', 'I', 'Y', 'i', 'y',
  46.     '\n', 'Ξ', '*', ':', 'J', 'Z', 'j', 'z',
  47.     'Ø', 27.toChar, '+', ';', 'K', 'Ä', 'k', 'ä',
  48.     'ø', 'Æ', '<', 'L', 'Ö', 'l', 'ö',
  49.     '\r', 'æ', '-', '=', 'M', 'Ñ', 'm', 'ñ',
  50.     'Å', '.', '>', 'N', 'Ü', 'n', 'ü',
  51.     'å', 'É', '/', '?', 'O', '§', 'o', 'à'
  52.   )
  53.  
  54.   assert(map.forall(c => GSMsymbols.exists(_ == c._2)),
  55.     "all target characters must be gsm symbols")
  56.  
  57.   def apply(str: String) = {
  58.     str.map((c: Char) => if (map.contains(c)) map(c) else c)
  59.   }
  60. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement