Advertisement
Guest User

Untitled

a guest
Oct 29th, 2014
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 6.83 KB | None | 0 0
  1. package forcomp
  2.  
  3. object Anagrams {
  4.  
  5.   /** A word is simply a `String`. */
  6.   type Word = String
  7.  
  8.   /** A sentence is a `List` of words. */
  9.   type Sentence = List[Word]
  10.  
  11.   /** `Occurrences` is a `List` of pairs of characters and positive integers saying
  12.     * how often the character appears.
  13.     * This list is sorted alphabetically w.r.t. to the character in each pair.
  14.     * All characters in the occurrence list are lowercase.
  15.     *
  16.     * Any list of pairs of lowercase characters and their frequency which is not sorted
  17.     * is **not** an occurrence list.
  18.     *
  19.     * Note: If the frequency of some character is zero, then that character should not be
  20.     * in the list.
  21.     */
  22.   type Occurrences = List[(Char, Int)]
  23.  
  24.   /** The dictionary is simply a sequence of words.
  25.     * It is predefined and obtained as a sequence using the utility method `loadDictionary`.
  26.     */
  27.   val dictionary: List[Word] = loadDictionary
  28.  
  29.   /** Converts the word into its character occurence list.
  30.     *
  31.     * Note: the uppercase and lowercase version of the character are treated as the
  32.     * same character, and are represented as a lowercase character in the occurrence list.
  33.     */
  34.   def wordOccurrences(w: Word): Occurrences = {
  35.     val lowerCaseWord = w.toLowerCase.toList
  36.  
  37.     def occurencesOfCharInString(char: Char): Int = {
  38.       lowerCaseWord.count((charInWord: Char) => char == charInWord)
  39.     }
  40.  
  41.     val charactersForOccurence: Map[Int, List[Char]] = lowerCaseWord.groupBy((char: Char) => occurencesOfCharInString(char))
  42.  
  43.     val occurences: Occurrences = charactersForOccurence.map(tuple => tuple._2.map(char => (char, tuple._1))).flatten.toSet.toList
  44.  
  45.     occurences.sortWith((left, right) => left._1 < right._1)
  46.  
  47.   }
  48.  
  49.   /** Converts a sentence into its character occurrence list. */
  50.   def sentenceOccurrences(s: Sentence): Occurrences = {
  51.     val mergedSentence: String = s.mkString("")
  52.     wordOccurrences(mergedSentence)
  53.   }
  54.  
  55.   /** The `dictionaryByOccurrences` is a `Map` from different occurrences to a sequence of all
  56.     * the words that have that occurrence count.
  57.     * This map serves as an easy way to obtain all the anagrams of a word given its occurrence list.
  58.     *
  59.     * For example, the word "eat" has the following character occurrence list:
  60.     *
  61.     * `List(('a', 1), ('e', 1), ('t', 1))`
  62.     *
  63.     * Incidentally, so do the words "ate" and "tea".
  64.     *
  65.     * This means that the `dictionaryByOccurrences` map will contain an entry:
  66.     *
  67.     * List(('a', 1), ('e', 1), ('t', 1)) -> Seq("ate", "eat", "tea")
  68.     *
  69.     */
  70.   lazy val dictionaryByOccurrences: Map[Occurrences, List[Word]] = dictionary.groupBy(word => wordOccurrences(word))
  71.  
  72.  
  73.   /** Returns all the anagrams of a given word. */
  74.   def wordAnagrams(word: Word): List[Word] = {
  75.     val occurencesForGivenWord: Occurrences = wordOccurrences(word)
  76.  
  77.     dictionaryByOccurrences.get(occurencesForGivenWord).get
  78.   }
  79.  
  80.   /** Returns the list of all subsets of the occurrence list.
  81.     * This includes the occurrence itself, i.e. `List(('k', 1), ('o', 1))`
  82.     * is a subset of `List(('k', 1), ('o', 1))`.
  83.     * It also include the empty subset `List()`.
  84.     *
  85.     * Example: the subsets of the occurrence list `List(('a', 2), ('b', 2))` are:
  86.     *
  87.     * List(
  88.     * List(),
  89.     * List(('a', 1)),
  90.     * List(('a', 2)),
  91.     * List(('b', 1)),
  92.     * List(('a', 1), ('b', 1)),
  93.     * List(('a', 2), ('b', 1)),
  94.     * List(('b', 2)),
  95.     * List(('a', 1), ('b', 2)),
  96.     * List(('a', 2), ('b', 2))
  97.     * )
  98.     *
  99.     * Note that the order of the occurrence list subsets does not matter -- the subsets
  100.     * in the example above could have been displayed in some other order.
  101.     */
  102.   def combinations(occurrences: Occurrences): List[Occurrences] = {
  103.     val result = for {
  104.       o1 <- occurrences
  105.       o2 <- occurrences
  106.       if o1._1 < o2._1
  107.       i1 <- 0 to o1._2
  108.       i2 <- 0 to o2._2
  109.     } yield ((o1._1, i1) ::(o2._1, i2) :: Nil).filter((p: (Char, Int)) => p._2 > 0)
  110.  
  111.     result ::: List(List())
  112.   }
  113.  
  114.  
  115.   /** Subtracts occurrence list `y` from occurrence list `x`.
  116.     *
  117.     * The precondition is that the occurrence list `y` is a subset of
  118.     * the occurrence list `x` -- any character appearing in `y` must
  119.     * appear in `x`, and its frequency in `y` must be smaller or equal
  120.     * than its frequency in `x`.
  121.     *
  122.     * Note: the resulting value is an occurrence - meaning it is sorted
  123.     * and has no zero-entries.
  124.     */
  125.   def subtract(x: Occurrences, y: Occurrences): Occurrences = {
  126.     val mapY = y.toMap
  127.  
  128.     val (toSubstract, ready) = x.partition(tuple => mapY.contains(tuple._1))
  129.  
  130.     (toSubstract.map(t => (t._1, t._2 - mapY(t._1))) ::: ready).filter(p => p._2 > 0).sortWith( (l,r) => l._1 < r._1)
  131.   }
  132.  
  133.   /** Returns a list of all anagram sentences of the given sentence.
  134.     *
  135.     * An anagram of a sentence is formed by taking the occurrences of all the characters of
  136.     * all the words in the sentence, and producing all possible combinations of words with those characters,
  137.     * such that the words have to be from the dictionary.
  138.     *
  139.     * The number of words in the sentence and its anagrams does not have to correspond.
  140.     * For example, the sentence `List("I", "love", "you")` is an anagram of the sentence `List("You", "olive")`.
  141.     *
  142.     * Also, two sentences with the same words but in a different order are considered two different anagrams.
  143.     * For example, sentences `List("You", "olive")` and `List("olive", "you")` are different anagrams of
  144.     * `List("I", "love", "you")`.
  145.     *
  146.     * Here is a full example of a sentence `List("Yes", "man")` and its anagrams for our dictionary:
  147.     *
  148.     * List(
  149.     * List(en, as, my),
  150.     * List(en, my, as),
  151.     * List(man, yes),
  152.     * List(men, say),
  153.     * List(as, en, my),
  154.     * List(as, my, en),
  155.     * List(sane, my),
  156.     * List(Sean, my),
  157.     * List(my, en, as),
  158.     * List(my, as, en),
  159.     * List(my, sane),
  160.     * List(my, Sean),
  161.     * List(say, men),
  162.     * List(yes, man)
  163.     * )
  164.     *
  165.     * The different sentences do not have to be output in the order shown above - any order is fine as long as
  166.     * all the anagrams are there. Every returned word has to exist in the dictionary.
  167.     *
  168.     * Note: in case that the words of the sentence are in the dictionary, then the sentence is the anagram of itself,
  169.     * so it has to be returned in this list.
  170.     *
  171.     * Note: There is only one anagram of an empty sentence.
  172.     */
  173.   def sentenceAnagrams(sentence: Sentence): List[Sentence] = {
  174.     def iter(occ: Occurrences): List[Sentence] = occ match {
  175.       case Nil => List(List())
  176.       case occ =>
  177.         for {
  178.           subOcc <- combinations(occ)
  179.           word <- dictionaryByOccurrences getOrElse (subOcc, List())
  180.           sentence <- iter(subtract(occ, subOcc))
  181.         } yield word :: sentence
  182.     }
  183.     iter(sentenceOccurrences(sentence))
  184.   }
  185.  
  186. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement