Advertisement
gruntfutuk

starwarscrawlfreqcount

Jan 30th, 2018
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.47 KB | None | 0 0
  1. # count frequency of words in Star Wars opening text (excluding some common words, defined in a set)
  2. # text from starwars.wikia.com/wiki/Opening_crawl
  3.  
  4. from collections import defaultdict as dd
  5. import string
  6.  
  7. def addeptext(eptext):
  8.     '''append to list of strings the opening crawl text of each Stars Wars episode'''
  9.  
  10.     eptext.append('''Episode I
  11.    THE PHANTOM MENACE
  12.    Turmoil has engulfed the
  13.    Galactic Republic. The taxation
  14.    of trade routes to outlying star
  15.    systems is in dispute.
  16.    
  17.    Hoping to resolve the matter
  18.    with a blockade of deadly
  19.    battleships, the greedy Trade
  20.    Federation has stopped all
  21.    shipping to the small planet
  22.    of Naboo.
  23.    
  24.    While the Congress of the
  25.    Republic endlessly debates
  26.    this alarming chain of events,
  27.    the Supreme Chancellor has
  28.    secretly dispatched two Jedi
  29.    Knights, the guardians of
  30.    peace and justice in the
  31.    galaxy, to settle the conflict....''')
  32.  
  33.     eptext.append('''Episode II
  34.    ATTACK OF THE CLONES
  35.    There is unrest in the Galactic
  36.    Senate. Several thousand solar
  37.    systems have declared their
  38.    intentions to leave the Republic.
  39.    
  40.    This separatist movement,
  41.    under the leadership of the
  42.    mysterious Count Dooku, has
  43.    made it difficult for the limited
  44.    number of Jedi Knights to maintain
  45.    peace and order in the galaxy.
  46.    
  47.    Senator Amidala, the former
  48.    Queen of Naboo, is returning
  49.    to the Galactic Senate to vote
  50.    on the critical issue of creating
  51.    an ARMY OF THE REPUBLIC
  52.    to assist the overwhelmed
  53.    Jedi....''')
  54.  
  55.     eptext.append('''Episode III
  56.    REVENGE OF THE SITH
  57.    War! The Republic is crumbling
  58.    under attacks by the ruthless
  59.    Sith Lord, Count Dooku.
  60.    There are heroes on both sides.
  61.    Evil is everywhere.
  62.    
  63.    In a stunning move, the
  64.    fiendish droid leader, General
  65.    Grievous, has swept into the
  66.    Republic capital and kidnapped
  67.    Chancellor Palpatine, leader of
  68.    the Galactic Senate.
  69.    
  70.    As the Separatist Droid Army
  71.    attempts to flee the besieged
  72.    capital with their valuable
  73.    hostage, two Jedi Knights lead a
  74.    desperate mission to rescue the
  75.    captive Chancellor....''')
  76.  
  77.     eptext.append('''Episode IV
  78.    A NEW HOPE
  79.    It is a period of civil war.
  80.    Rebel spaceships, striking
  81.    from a hidden base, have won
  82.    their first victory against
  83.    the evil Galactic Empire.
  84.    
  85.    During the battle, Rebel
  86.    spies managed to steal secret
  87.    plans to the Empire's
  88.    ultimate weapon, the DEATH
  89.    STAR, an armored space
  90.    station with enough power to
  91.    destroy an entire planet.
  92.    
  93.    Pursued by the Empire's
  94.    sinister agents, Princess
  95.    Leia races home aboard her
  96.    starship, custodian of the
  97.    stolen plans that can save
  98.    her people and restore
  99.    freedom to the galaxy.....''')
  100.  
  101.     eptext.append('''Episode V
  102.    THE EMPIRE STRIKES BACK
  103.    It is a dark time for the
  104.    Rebellion. Although the Death
  105.    Star has been destroyed,
  106.    Imperial troops have driven the
  107.    Rebel forces from their hidden
  108.    base and pursued them across
  109.    the galaxy.
  110.    
  111.    Evading the dreaded Imperial
  112.    Starfleet, a group of freedom
  113.    fighters led by Luke Skywalker
  114.    has established a new secret
  115.    base on the remote ice world
  116.    of Hoth.
  117.    
  118.    The evil lord Darth Vader,
  119.    obsessed with finding young
  120.    Skywalker, has dispatched
  121.    thousands of remote probes into
  122.    the far reaches of space....''')
  123.  
  124.     eptext.append('''Episode VI
  125.    RETURN OF THE JEDI
  126.    Luke Skywalker has returned to
  127.    his home planet of Tatooine in
  128.    an attempt to rescue his
  129.    friend Han Solo from the
  130.    clutches of the vile gangster
  131.    Jabba the Hutt.
  132.    
  133.    Little does Luke know that the
  134.    GALACTIC EMPIRE has secretly
  135.    begun construction on a new
  136.    armored space station even
  137.    more powerful than the first
  138.    dreaded Death Star.
  139.    
  140.    When completed, this ultimate
  141.    weapon will spell certain doom
  142.    for the small band of rebels
  143.    struggling to restore freedom
  144.    to the galaxy...''')
  145.  
  146.     eptext.append('''Episode VII
  147.    THE FORCE AWAKENS
  148.    Luke Skywalker has vanished.
  149.    In his absence, the sinister
  150.    FIRST ORDER has risen from
  151.    the ashes of the Empire
  152.    and will not rest until
  153.    Skywalker, the last Jedi,
  154.    has been destroyed.
  155.    
  156.    With the support of the
  157.    REPUBLIC, General Leia Organa
  158.    leads a brave RESISTANCE.
  159.    She is desperate to find her
  160.    brother Luke and gain his
  161.    help in restoring peace
  162.    and justice to the galaxy.
  163.    
  164.    Leia has sent her most daring
  165.    pilot on a secret mission
  166.    to Jakku, where an old ally
  167.    has discovered a clue to
  168.    Luke's whereabouts....''')
  169.  
  170.     eptext.append('''Episode VIII
  171.    THE LAST JEDI
  172.    The FIRST ORDER reigns.
  173.    Having decimated the peaceful
  174.    Republic, Supreme Leader Snoke
  175.    now deploys the merciless
  176.    legions to seize military
  177.    control of the galaxy.
  178.    
  179.    Only General Leia Organa's
  180.    band of RESISTANCE fighters
  181.    stand against the rising
  182.    tyranny, certain that Jedi
  183.    Master Luke Skywalker will
  184.    return and restore a spark of
  185.    hope to the fight.
  186.    
  187.    But the Resistance has been
  188.    exposed. As the First Order
  189.    speeds toward the rebel base,
  190.    the brave heroes mount a
  191.    desperate escape....''')
  192.  
  193.  
  194. def countwords(epcount, eptext):
  195.     '''generate frequency table in dictionary of occurrences of words in supplied list of strings'''
  196.  
  197.     for episode in eptext:
  198.         for word in episode.split():
  199.             cleanword = word.translate(str.maketrans('','',string.punctuation)).lower()
  200.             epcount[cleanword] += 1
  201.  
  202.  
  203. def outputcount(epcount):
  204.     '''Output frequency table dictionary of word occurrences ordered by value then key'''
  205.  
  206.  
  207.     print('\n\n')
  208.     print("Star Wars films - opening 'crawl' text frequency count.")
  209.     if EXCLUDEWORDS:
  210.         print(f'\t(excluding the words: {", ".join(EXCLUDEWORDS)})\n')
  211.     print(f'{"word":<15} {"count":>4}')
  212.     for word, count in sorted(epcount.items(), key=lambda x: (-x[1],x[0])):
  213.         print(f'{word:<15} {count:>4d}')
  214.  
  215. '''
  216.    main code
  217. '''
  218.  
  219. eptext = []
  220. epcount = dd(int)
  221. EXCLUDEWORDS = frozenset({'the','of', 'to', 'has', 'a'})
  222.  
  223. addeptext(eptext)
  224. countwords(epcount, eptext)
  225.  
  226. # remove count of very boring words
  227. for word in EXCLUDEWORDS:
  228.     if epcount.get(word):
  229.         del epcount[word]
  230.  
  231. outputcount(epcount)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement