SHARE
TWEET

Untitled

a guest Jun 19th, 2016 18 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import copy, itertools, collections, math, string, unicodedata, re, sys
  2.  
  3. # Sentence terminators.  See http://unicode.org/reports/tr29/ at 5.1.1.
  4. unicode_SATerm = [unichr(x) for x in [0x2e,0x2024,0xfe52,0xff0e,33,46,63,1372,1374,1417,1567,1748,1792,1793,1794,2041,2404,2405,4170,4171,4962,4967,4968,5742,5941,5942,6147,6153,6468,6469,6824,6825,6826,6827,7002,7003,7006,7007,7227,7228,7294,7295,8252,8253,8263,8264,8265,11822,12290,42239,42510,42511,42739,42743,43126,43127,43214,43215,43311,43464,43465,43613,43614,43615,43760,43761,44011,65106,65110,65111,65281,65294,65311,65377]]
  5. # my build of Python is narrow so won't take characters 68182,68183,69703,69704,69822,69823,69824,69825,69953,69954,69955,70085,70086
  6.  
  7. def normalise(s):
  8.   if isinstance(s, str):
  9.     s = unicode(s, 'utf8')
  10.   s = unicodedata.normalize('NFKD', s) # worried about ligatures mostly
  11.   s = s.lower().translate({ord(u'\u2019'): ord('\'')})
  12.   for c in s:
  13.     # This is Postelish: if I could forget curly quotes or ellipses or..., what else might I overlook?
  14.     if not(unicodedata.category(c)[0] in ['P','Z']
  15.           or c in u'\t\n\x0b\x0c\r \x85' # other whitespace
  16.           or c in string.ascii_lowercase):
  17.       raise ValueError("bad character \\u%04x" % ord(c))
  18.   return s
  19.  
  20. class WordString:
  21.   """A string broken into its words.
  22.  
  23. The purpose of these objects is to hold the list of words and similar transformations
  24. so they don't need to be recomputed all the time."""
  25.   def __init__(self, s=''):
  26.     self.string = normalise(s)
  27.  
  28.     self.sentences = []
  29.     self.words = []
  30.     sentence = []
  31.     word = ''
  32.     for c in self.string:
  33.       if c in string.ascii_lowercase or c == '\'':
  34.         word = word + c
  35.       else:
  36.         word = word.strip('\'') # extremal apostrophes are single quotes, but internal apostrophes are letters.
  37.         if word != '':
  38.           self.words.append(word)
  39.           sentence.append(word)
  40.           if c in unicode_SATerm and len(sentence) > 0:
  41.             self.sentences.append(sentence)
  42.             sentence = []
  43.         word = ''
  44.     word = word.strip('\'')
  45.     if word != '':
  46.       self.words.append(word)
  47.       sentence.append(word)
  48.     if len(sentence) > 0:
  49.       self.sentences.append(sentence)
  50.  
  51.     self.words_no_apostrophes = [string.join([c for c in w if c != '\''],'') for w in self.words]
  52.  
  53. # These are the base words, taken from Randall's simplewriter.  (Note that some have apostrophes.)
  54. valid_words = "understandings|understanding|conversations|disappearing|informations|grandmothers|grandfathers|questionings|conversation|information|approaching|understands|immediately|positioning|questioning|grandmother|travellings|questioners|recognizing|recognizers|televisions|remembering|rememberers|expressions|discovering|disappeared|interesting|grandfather|straightest|controllers|controlling|considering|remembered|cigarettes|companying|completely|spreadings|considered|continuing|controlled|stationing|controller|straighter|stretching|businesses|somebodies|soldiering|countering|darknesses|situations|directions|disappears|younglings|suggesting|afternoons|breathings|distancing|screenings|schoolings|especially|everything|everywhere|explaining|explainers|expression|branchings|revealings|repeatings|surprising|rememberer|somewheres|television|themselves|recognizer|recognizes|recognized|belongings|finishings|travelling|questioner|beginnings|travelings|questioned|followings|pretending|forgetting|forgetters|forwarding|positioned|travellers|gatherings|perfecting|understand|understood|weightings|approaches|officering|numberings|happenings|mentioning|letterings|husbanding|imaginings|approached|apartments|whispering|interested|discovered|spinnings|clearings|climbings|spendings|clothings|colorings|soundings|truckings|somewhere|troubling|companies|companied|beautiful|computers|confusing|considers|travelers|youngling|continues|continued|traveller|traveling|yellowing|apartment|beginning|wheelings|travelled|sometimes|something|appearing|cornering|believing|countered|believers|countries|soldiered|coverings|creatures|crossings|accepting|daughters|belonging|situation|silvering|different|silencing|touchings|bettering|tomorrows|disappear|thinkings|boardings|discovers|admitting|wrappings|distances|distanced|sightings|shrugging|doctoring|showering|shoulders|shoppings|shootings|dressings|sheetings|shadowing|settlings|servicing|seriously|seconding|searching|weighting|screening|screaming|schooling|teachings|bothering|everybody|botherers|bottoming|excepting|expecting|explained|direction|explainer|surprised|surprises|waterings|branching|revealing|returning|surfacing|familiars|repeating|fathering|reminding|supposing|breasting|attacking|remembers|breathing|remaining|breathers|brightest|brownings|suggested|recognize|fightings|attention|figurings|receiving|reasoning|realizing|fingering|buildings|finishing|stupidest|stuffings|questions|watchings|flashings|strongest|strikings|flighting|flowering|promisers|promising|following|bathrooms|prettiest|pretended|stretched|foreheads|foresting|stretches|forgotten|pressings|forgetter|strangest|preparing|forwarded|strangers|possibles|positions|afternoon|straights|pocketing|gardening|pleasings|wondering|gathering|picturing|personals|perfected|stomaches|stomached|carefully|stationed|catchings|parenting|paintings|orderings|groupings|wintering|officered|offerings|centering|numbering|neighbors|certainly|happening|narrowing|narrowest|mountains|mothering|mirroring|middlings|messaging|standings|mentioned|mattering|marriages|histories|machining|hospitals|listening|lightings|springing|lettering|husbanded|spreaders|whispered|imagining|imaginers|spreading|important|languages|answering|cigarette|interests|spiriting|cleanings|knockings|soundest|coatings|sounders|sounding|colleges|coloring|colorful|wouldn't|training|colorers|sorriest|worrying|belonged|approach|tracking|touchers|touching|computer|whatever|toppings|confused|confuses|workings|consider|bettered|teething|tonights|tonguers|tonguing|continue|arriving|tomorrow|controls|together|blacking|blackest|throwers|blocking|throwing|coolings|someones|blockers|somebody|thirties|soldiers|cornered|weighted|counting|thoughts|counters|thinking|thinners|thinning|coursing|covering|thinnest|craziest|snapping|creating|creature|thickest|boarding|crossing|smokings|crowding|smelling|smallest|cuttings|slipping|slightly|dancings|sleepers|sleeping|slamming|wordings|darkness|daughter|boatings|skinning|weddings|thanking|sittings|deciding|deciders|singling|singings|despites|simplest|terrible|silvered|tellings|wearings|youngest|watering|silences|teachers|bookings|agreeing|teaching|discover|attacked|bothered|botherer|watching|swingers|bottling|distance|silenced|signings|bottomed|sighting|shutting|shrugged|wondered|swinging|doctored|sweetest|showered|showings|doorways|shouting|shoulder|wronging|shortest|surprise|dragging|shopping|shooters|drawings|actually|shooting|dreaming|dressing|avoiding|shitting|shirting|shipping|drinking|drinkers|braining|sheeting|sharpest|drivings|sharpers|dropping|droppers|shadowed|surfaced|settling|washings|settings|services|serviced|earliest|backings|earthing|servings|branches|branched|seconded|seatings|surfaces|searched|searches|walkings|screened|waitings|screamed|supposed|emptiest|emptying|breaking|breakers|schooled|enjoying|enjoyers|entering|runnings|breasted|rounders|rounding|supposes|everyone|visitors|visiting|breathed|excepted|roofings|exciting|breathes|expected|rollings|bankings|breather|explains|villages|bridging|viewings|brighter|ringings|righting|suitings|bringing|revealed|bringers|returned|failings|repliers|replying|repeated|brothers|familiar|wintered|families|suggests|farthest|furthest|browning|fathered|removing|building|reminded|bathroom|allowing|suddenly|remember|allowers|feedings|builders|burnings|feelings|remained|refusing|stupider|windings|although|stuffing|studying|business|angriest|fighting|fighters|students|figuring|received|twenties|receives|fillings|reasoned|findings|stronger|turnings|realizes|realized|readiest|fingered|readying|striking|trusters|finishes|trusting|finished|readings|reachers|reaching|quieters|quietest|quieting|fittings|quickest|writings|beaching|question|trucking|callings|stranger|flashing|beatings|answered|flattest|flatting|flighted|straight|troubled|flowered|pullings|storming|promiser|couldn't|promised|promises|followed|stoppers|problems|probably|prettier|stopping|pretends|stomachs|troubles|pressers|tripping|forehead|stickers|forested|pressing|whispers|carrying|sticking|carriers|stepping|stealers|forwards|stealing|becoming|prepares|prepared|powering|freeings|stations|possible|position|freshest|beddings|wrapping|fronting|catching|fuckings|policing|funniest|pointers|pointing|catchers|pocketed|gardened|starters|ceilings|pleasing|gathered|starting|centered|platings|plastics|planning|pictured|pictures|traveler|pickings|personal|glancing|yourself|chancing|perfects|changing|peopling|partying|partings|parented|grabbing|grabbers|changers|checking|starring|bedrooms|checkers|pairings|standing|painting|outsides|greatest|cheeking|greening|greenest|grouping|ordering|anything|openings|guarding|wheeling|officers|guessing|spreader|offering|children|anywhere|numbered|choicest|noticers|noticing|hallways|nothings|hangings|nobodies|admitted|neighbor|choosing|choosers|happened|neckings|happiest|narrowed|narrower|spotting|churches|mouthing|traveled|mountain|mothered|accepted|mornings|mirrored|headings|spirited|hearings|heatings|circling|middling|messaged|messages|heaviest|spinners|mentions|helpings|cleanest|memories|meetings|meanings|appeared|mattered|marrieds|marrying|marriage|yellowed|markings|cleaning|managing|cleaners|holdings|machined|machines|lunching|luckiest|lowering|longings|clearest|hospital|lockings|littlest|clearing|listened|housings|lightest|lighting|lighters|spinning|hundreds|hurrying|believes|spenders|believed|climbing|husbands|lettered|lettings|learning|leadings|ignoring|laughing|ignorers|imagines|yellower|imagined|climbers|imaginer|spending|closings|specials|speakers|language|believer|clothing|clouding|speaking|interest|spacings|landings|knowings|southest|jacketed|knocking|kitchens|kissings|killings|keepings|dresses|biggest|sticker|careful|shirted|warmers|shipped|birding|drinker|carries|sheeted|warming|carried|carrier|driving|sharper|tonight|drivers|casings|sharers|sharing|stepped|dropped|dropper|whisper|shapers|shaping|shakers|shaking|tonguer|shadows|stealer|several|tongued|staying|settles|settled|dusting|setting|tongues|catting|backing|catches|earlier|warmest|earthed|service|serving|warring|wanters|catcher|serious|eastest|sensing|senders|easiest|sending|sellers|selling|seeming|seeings|tiniest|seconds|station|causing|seating|edgings|stating|timings|efforts|starter|causers|screens|blacker|ceiling|screams|centers|wanting|walling|walkers|certain|emptied|empties|emptier|thrower|endings|started|schools|scarers|scaring|sayings|engines|savings|sanding|enjoyed|starers|saddest|enjoyer|staring|enoughs|rushing|bagging|runners|entered|running|chances|entires|chancer|rubbing|rowings|rounder|chanced|rounded|starred|rooming|changed|changes|blocked|angrier|exactly|changer|blocker|excepts|checked|excited|walking|excites|roofing|through|expects|blooded|checker|cheeked|throats|explain|wakings|springs|thought|waiting|blowing|rolling|rocking|risings|ringing|baggers|animals|righter|righted|ridings|richest|facings|reveals|blowers|choicer|choices|returns|voicing|worries|resting|chooses|failing|spreads|replier|failers|falling|spotted|replies|replied|chooser|thinned|fallers|thinner|balling|boarded|repeats|visitor|farther|further|circles|another|removed|fastest|removes|fathers|thicker|circled|visited|reminds|fearing|spirits|classes|answers|banking|boating|cleaned|feeding|spinner|thanked|village|worried|feeling|cleaner|remains|cleared|refuses|refused|workers|reddest|telling|yellows|spender|working|clearer|clearly|climbed|tearing|fighter|teaming|figured|figures|booking|viewing|climber|usually|closest|receive|filling|teacher|reasons|closing|finally|closers|anybody|finding|anymore|realize|special|finders|booting|realest|clothed|readier|readies|readied|fingers|teaches|tallest|clothes|speaker|readers|talkers|clouded|talking|reading|firings|spacing|takings|reacher|reached|coating|reaches|raising|raining|fishing|quietly|fittest|fitting|systems|whether|bothers|wrapped|fitters|quieted|quieter|quickly|coffees|quicker|fixings|coldest|sounded|sounder|actings|anyways|college|flashed|flashes|bottles|flatter|flatted|colored|bottled|wording|turning|sorting|flights|colorer|putting|pushers|pushing|flowers|pullers|swinger|wonders|sorrier|pulling|proving|comings|bottoms|promise|truster|boxings|company|follows|younger|trusted|sweeter|yelling|problem|without|beached|footing|confuse|beaches|brained|bearing|pretend|trucked|forcing|presser|wishing|trouble|forests|appears|beating|airings|forever|surface|control|forgets|accepts|pressed|wronged|winters|forming|presses|prepare|beaters|breaker|wheeled|because|forward|coolers|cooling|allowed|powered|pourers|freeing|pouring|tripped|coolest|breasts|someone|fresher|suppose|somehow|friends|breaths|copping|fronted|becomes|porches|poppers|popping|poorest|treeing|fucking|fullest|pooling|breathe|polices|funnier|funnies|policed|bedding|corners|futures|pointer|pointed|gamings|counted|soldier|pockets|wetting|pleased|gardens|wetters|wettest|pleases|counter|sunning|players|westest|country|gathers|bridges|playing|plating|bridged|plastic|couples|softest|getting|planned|getters|placing|gifting|pinking|pilings|piecing|picture|coursed|courses|summers|picking|snowing|phoning|bedroom|glances|glanced|winging|snapped|glassed|glasses|perhaps|covered|crazies|crazier|perfect|peopled|persons|peoples|suiting|pausing|passing|goldest|partied|windows|parties|parting|creates|grabbed|smokers|created|grabber|brought|weights|bringer|arrives|crosser|crosses|grasses|parents|palming|graying|pairing|crossed|painted|arrived|greying|smoking|paining|outside|brother|greater|smilers|outings|greened|greener|crowded|travels|smiling|ordered|grounds|offings|smelled|openers|browner|grouped|opening|smaller|growing|okaying|officer|guarded|slowest|slowing|cupping|slipped|guessed|guesses|cutting|offices|gunning|offered|browned|allower|nursing|numbing|suggest|cutters|numbers|sliders|halving|sliding|noticer|wedding|notices|noticed|nothing|writers|hallway|handing|sleeper|normals|noising|hanging|nodding|dancing|wearing|writing|slammed|hangers|darkest|skinned|happens|trained|needing|builder|beliefs|happier|necking|nearest|hardest|nearing|burning|believe|winding|hatting|narrows|stupids|sitting|mouthed|deadest|watered|sisters|mothers|singled|winning|morning|mooning|moments|heading|missing|decides|decided|decider|mirrors|minutes|hearing|minings|already|minding|middled|heating|burners|singles|middles|deepest|stuffed|heaters|singing|simpler|heavier|heavies|belongs|message|despite|mention|simples|studies|studied|silvers|helping|helpers|members|meeting|willing|meanest|attacks|herself|meaning|dinners|student|hidings|matters|marries|married|busying|busiest|silence|against|highest|wildest|hilling|marking|mapping|manages|managed|himself|history|tracked|strikes|manning|hitting|makings|hitters|whiting|towards|watched|holding|toucher|machine|holders|lunches|lunched|watches|luckier|stretch|streets|lowered|loudest|lookers|looking|longing|calling|longest|locking|bending|washing|signing|hottest|littler|benders|strange|sighted|listens|linings|likings|housing|beneath|sighing|sicking|however|lighted|sickest|lighter|calming|lifters|hundred|calmest|hurried|hurries|lifting|touched|doesn't|hurting|touches|showers|husband|doctors|letters|cameras|letting|tossing|leaving|learned|dogging|leaning|leafing|leaders|leading|whitest|layered|ignored|showing|ignores|stories|ignorer|shoving|laughed|lasting|largest|imaging|doorway|besting|imagine|shouted|stormed|downing|storing|topping|avoided|dragged|shorter|betters|stopper|landers|insides|instead|written|drawing|shopped|stopped|between|landing|shooter|knowing|jackets|dreamed|carding|toothed|knocked|knifing|kitchen|joining|teethed|stomach|joiners|kissing|kindest|killers|killing|shoeing|kidding|jumping|kickers|kicking|jumpers|keepers|dressed|keeping|enough|checks|kicked|jumper|kicker|kidded|jumped|killed|joking|killer|kinder|joiner|kisses|kissed|joined|knives|knifes|knifed|jacket|knocks|itself|ladies|landed|lander|inside|larger|images|lasted|imaged|laughs|ignore|aboves|laying|accept|layers|across|yellow|leaded|leader|leaved|leaned|learns|leaves|yelled|lesser|letter|living|lifted|lifter|humans|hugest|lights|wrongs|houses|liking|likers|lining|housed|acting|listen|hotels|little|hotter|locals|locked|horses|longer|longed|looked|hoping|looker|losing|adding|louder|loving|lovers|lowing|lowest|writer|lowers|homing|holing|holder|making|hitter|makers|manned|manage|writes|admits|mapped|marked|hilled|higher|afraid|hiding|hidden|matter|ageing|helper|member|helped|memory|hellos|heater|metals|middle|heated|mights|minded|hearts|mining|minute|headed|mirror|misses|missed|moment|moneys|monies|months|mooned|mostly|having|mother|worlds|hating|mouths|moving|movers|movies|musics|worker|myself|naming|namers|narrow|hatted|hardly|nearer|neared|nearly|harder|necked|needed|happen|hanger|newest|nicest|nights|worked|nobody|nodded|handed|noises|noised|worded|normal|norths|nosing|agrees|noting|notice|halves|halved|number|guying|numbed|nurses|nursed|agreed|wooden|offing|gunned|offers|office|guards|wonder|okayed|okay'd|ok'ing|oldest|womens|opened|opener|groups|womans|within|ground|orders|others|outing|wished|greens|greats|owning|wishes|owners|paging|pained|paints|greyed|greyer|paired|palest|grayed|palmed|papers|grayer|parent|parted|passed|golder|passes|pauses|paused|paying|person|people|wipers|goings|glance|phones|phoned|photos|picked|giving|givens|pieces|pieced|piling|gifted|pinked|pinker|places|placed|getter|gotten|plated|plates|gently|played|gather|player|please|gating|garden|pocket|gamers|points|pointy|gaming|future|wiping|fuller|police|pooled|poorer|fucked|popped|popper|fronts|friend|freers|poured|pourer|freest|powers|formed|forget|forgot|forest|forces|forced|footed|pretty|follow|fliers|flyers|proven|airing|proves|proved|prover|pulled|flying|puller|flower|pushes|pushed|floors|pusher|flight|fixers|fixing|quicks|winter|fitted|quiets|fitter|winged|radios|rained|raises|raised|fishes|rather|fished|firsts|firing|reader|finish|finger|fining|finest|realer|finder|really|finals|reason|filled|figure|fought|fights|fields|fewest|redder|refuse|remain|feeing|remind|feared|father|faster|remove|repeat|family|faller|fallen|failer|failed|rested|fading|return|reveal|riches|richer|riding|ridden|window|riders|rights|facing|allows|ringed|rising|rivers|extras|rocked|rolled|expect|roofed|excite|except|rooves|roomed|events|rounds|rowing|evened|rubbed|almost|entire|runner|enters|keying|rushed|rushes|sadder|safest|sanded|enjoys|saving|engine|savers|winded|saying|enders|scared|scares|scarer|scenes|ending|school|scream|either|eights|screen|egging|effort|search|edging|seated|second|eaters|seeing|seemed|eating|seller|sender|senses|sensed|easier|easily|earths|serves|served|willed|dusted|settle|during|driers|sevens|sexing|shadow|shakes|shaken|dryers|shaker|always|shaped|driest|shapes|shaper|drying|shares|shared|sharer|sharps|driver|drives|driven|sheets|droves|drinks|shirts|drunks|shoots|dreams|shorts|dozens|should|downed|shouts|shoved|shoves|showed|wilder|shower|dogged|doctor|shrugs|sicker|sicked|didn't|siding|sighed|doings|sights|signed|dinner|silent|silver|dyings|widest|simple|simply|deeper|single|decide|deaths|sister|deader|sizing|darker|wholes|sleeps|dances|danced|slides|slider|cutter|slower|slowed|slowly|smalls|cupped|smells|smelly|crying|smiles|smiled|smiler|crowds|smokes|smoked|smoker|create|covers|snowed|whited|softer|course|softly|couple|counts|corner|whiter|copped|cooled|cooler|coming|whites|sorted|colors|colder|sounds|coffee|coated|spaces|clouds|spaced|spoken|speaks|clothe|closed|closes|closer|spends|climbs|clears|cleans|spirit|cities|circle|church|choose|spread|chosen|choice|chests|sprung|spring|sprang|stages|stairs|cheeks|stands|keeper|change|chance|stared|stares|starer|chairs|starts|center|causer|caused|states|stated|causes|caught|catted|stayed|steals|stolen|casing|sticks|caring|carded|stones|animal|cannot|stored|stores|storms|answer|camera|calmer|calmed|called|street|buyers|bought|strike|struck|buying|anyone|strong|busier|busied|busing|burner|stuffs|burned|stupid|builds|browns|suites|suited|brings|summer|bright|sunned|bridge|breath|breast|breaks|broken|surest|branch|brains|anyway|boxing|wheels|sweets|swings|bottom|bottle|system|bother|tables|taking|takers|talked|talker|boring|taller|booted|taught|booked|teamed|teared|boning|appear|bodies|thanks|boated|thicks|boards|bluest|things|thinks|blower|thirds|thirty|though|threes|throat|bloods|thrown|throws|blocks|timing|blacks|timers|tinier|biters|tiring|todays|biting|toning|tongue|arming|birded|bigger|wetter|toothy|beyond|better|topped|tossed|bested|tosses|beside|bender|toward|bended|tracks|belong|trains|belief|travel|behind|begins|before|bedded|became|become|beater|beaten|trucks|truest|aren't|trusts|truths|trying|turned|twenty|around|uncles|weight|wasn't|arrive|unless|upping|wedded|viewed|barely|visits|banked|balled|voices|voiced|waited|bagger|waking|walked|bagged|walker|walled|asking|wanted|wanter|warred|waring|backed|warmed|warmer|babies|washed|washes|avoids|attack|waters|asleep|watery|waving|wavers|seems|party|minds|eaten|sells|sends|known|sense|hours|pasts|paths|easts|pause|mined|layer|payed|serve|earth|early|wills|aired|heard|hears|dusts|kills|goers|hotel|seven|dried|ideas|sexed|sexes|going|drier|dries|dryer|glass|heads|shake|leads|shook|aging|gives|phone|local|photo|shape|picks|above|locks|money|drops|share|given|wrong|girls|month|sharp|piece|wilds|sheet|drove|drive|moons|lands|piles|ships|drink|piled|drank|drunk|shirt|pinks|shits|dress|shoes|mores|shoot|longs|shots|dream|drawn|draws|drags|shops|haves|horse|short|gifts|dozen|place|downs|shout|hopes|shove|hoped|plans|wiper|doors|shown|shows|wiped|plate|world|mouth|doers|joins|shrug|shuts|leafs|moved|plays|moves|sicks|pleas|sided|sides|sighs|don't|gated|sight|looks|gates|wives|mover|signs|doing|dirts|knees|movie|learn|gamer|games|gamed|dying|music|since|desks|sings|singe|deeps|point|acted|musts|yells|funny|death|wider|loses|sixes|whose|names|sizes|sized|skins|keyed|skies|pools|slams|darks|named|slept|namer|sleep|leave|dance|slide|hated|young|whole|fucks|slips|who's|slows|front|porch|loved|hates|small|fresh|cries|cried|smell|white|nears|loves|smile|freer|pours|lover|freed|power|smoke|frees|yeses|crowd|cross|jokes|fours|snaps|crazy|forms|cover|homed|snows|among|necks|happy|least|press|force|homes|count|needs|wipes|years|cools|foots|joked|foods|never|songs|comes|sorry|flier|color|sorts|souls|lower|newer|flyer|colds|sound|flown|south|works|coats|space|nicer|prove|lucky|spoke|night|speak|cloud|hurts|yards|pulls|holed|flies|close|climb|spent|spend|words|holes|hangs|clear|lunch|spins|clean|class|liars|floor|holds|spots|alive|noise|flats|chose|flash|nones|child|fixer|fixed|fixes|chest|cheek|mains|stage|hands|makes|stair|quick|stood|check|fiver|stand|stars|fives|north|wrote|stare|lying|quiet|noses|quite|start|chair|nosed|radio|lived|rains|notes|state|large|cause|raise|catch|noted|maker|stays|halls|angry|stole|steal|reach|first|cased|cases|steps|lives|fires|stuck|carry|stick|cares|still|cared|fired|cards|added|stone|reads|halve|stops|write|ready|hairy|store|hairs|can't|storm|numbs|story|could|finer|knife|fines|calms|fined|calls|hurry|while|buyer|finds|nurse|found|which|lifts|admit|final|fills|lasts|keeps|where|buses|bused|study|offed|stuff|fight|woods|burnt|burns|field|human|build|built|wings|offer|brown|allow|guyed|suite|suits|bring|marks|fewer|feels|hills|wines|later|feeds|agree|guess|surer|fears|broke|break|guard|brain|highs|often|marry|ahead|knock|boxes|sweet|boxed|okays|swing|swung|falls|reply|hides|fails|huger|table|takes|taken|laugh|taker|rests|house|talks|bored|women|faded|fades|wheel|facts|wraps|boots|teach|faces|teams|older|books|tears|bones|maybe|woman|faced|areas|boned|opens|tells|rides|grows|thank|their|boats|thens|there|these|thick|rider|after|board|right|bluer|thins|blues|blued|grown|thing|again|rings|think|blows|blown|third|would|means|those|risen|three|rises|blood|eying|heres|throw|block|threw|roses|group|river|black|tying|times|timed|roads|rocks|order|timer|meant|green|tired|tires|extra|meets|today|rolls|biter|bitey|other|toned|tones|light|bites|worry|birds|roofs|armed|outer|rooms|outed|every|tooth|teeth|round|image|bests|event|liked|evens|rowed|likes|touch|bends|windy|bents|towns|winds|great|below|track|overs|owned|liker|train|enter|wound|begun|helps|began|begin|owner|beers|kinds|wests|paged|trees|treed|tripe|trips|pages|alone|hello|beats|enjoy|bears|truck|beach|safer|trues|truer|trued|safes|hells|sames|trust|truth|pains|wells|sands|tried|tries|greys|turns|isn't|heavy|twice|saves|uncle|saved|under|kicks|saver|paint|lines|grays|until|weeks|upped|pairs|using|asked|usual|scare|being|ender|metal|views|paled|banks|visit|pales|paler|voice|scene|heats|waits|balls|ended|empty|woken|palms|wakes|waked|walks|lined|knows|pants|worse|paper|walls|worst|wants|eight|heart|along|backs|egged|jumps|warms|grass|might|edges|grabs|seats|avoid|parts|edged|aunts|watch|about|eater|water|won't|waved|waves|goods|waver|golds|wears|ears|grab|fits|each|sets|knee|lots|part|dust|noes|fish|stay|good|rain|cats|work|wild|laid|hang|gold|pass|step|loud|case|help|your|past|nods|home|care|path|hell|read|love|fire|gods|lift|card|stop|pays|keys|cars|paid|idea|fine|none|real|into|drop|heat|wish|cans|kids|find|goer|goes|went|calm|just|lead|gone|call|fill|nose|ship|huge|acts|lows|buys|some|note|kind|shit|shat|mind|ices|busy|pick|hand|shod|shoe|gave|reds|shot|hall|fews|ours|feel|burn|drew|such|draw|shop|give|felt|wing|suit|drag|hear|feed|mine|girl|feds|iced|down|when|fees|half|suns|able|word|fear|nows|door|fast|sure|leaf|pile|jobs|show|wine|boys|dogs|yell|hair|guys|kept|doer|fall|fell|head|shut|gift|hole|rest|numb|kick|lean|take|both|sick|fail|fade|took|miss|side|sigh|held|talk|last|plan|bore|hold|done|tall|teas|fact|boot|like|wife|rich|sign|book|wood|team|does|main|offs|tear|tore|torn|rode|dirt|gets|bone|joke|ride|make|told|play|died|tell|dies|tens|area|body|than|boat|line|guns|desk|that|what|kiss|them|they|gate|sang|then|plea|kill|face|sing|sung|eyes|thin|blue|deep|made|rung|ring|sirs|wide|rang|moon|blow|eyed|sits|more|whys|dead|blew|days|this|left|grew|he's|size|rise|rose|whom|have|skin|most|late|grow|slam|road|game|tied|ties|arms|time|dark|rock|okay|ages|mens|roll|mans|tiny|slid|dads|airs|ok'd|tire|wets|roof|slip|full|cuts|pool|slow|tone|bite|lips|cups|bits|room|olds|poor|bird|adds|ever|knew|hate|fuck|pops|even|tops|wipe|hits|once|west|hour|rows|rubs|toss|best|ones|only|from|runs|bend|bent|onto|open|move|town|free|pour|legs|rush|jump|snap|many|hill|less|maps|snow|keep|safe|much|soft|join|beer|i'll|beds|four|tree|same|sand|form|cops|must|year|cool|trip|lets|beat|mark|born|bear|with|come|save|know|true|sons|lock|song|soon|laws|came|outs|name|well|been|says|said|sort|feet|soul|high|yeah|were|hide|foot|turn|cold|wind|yard|twos|coat|food|over|hats|owns|ends|lady|aged|arts|else|long|flew|hurt|page|week|upon|lays|used|uses|hard|eggs|wins|very|mays|seas|pain|near|view|bars|weds|pull|edge|wrap|lies|bank|spin|ball|grey|seat|spun|lied|neck|push|wait|hope|bags|city|look|wake|spot|saws|woke|wear|pink|liar|eats|walk|need|sees|seen|puts|seem|wall|want|pair|gray|sell|will|flat|back|pale|sold|asks|wars|land|send|mean|warm|baby|sent|also|wash|away|here|easy|hung|sens|star|hers|aunt|palm|worn|life|meet|wore|east|live|news|five|wave|next|lost|lose|nice|ways|far|few|war|bad|bag|bar|wed|use|ups|art|was|two|try|are|bed|top|arm|wet|big|too|bit|tie|the|ten|tvs|tea|box|boy|sun|bus|but|buy|any|can|car|cat|and|son|cop|sos|cry|cup|cut|who|dad|sky|day|six|why|sit|sat|sir|die|did|dog|she|dry|sex|set|ear|ate|eat|see|saw|win|won|sea|egg|end|say|sad|ran|run|rub|row|eye|rid|ask|fed|fee|red|way|fit|fix|all|put|fly|for|pop|fun|get|got|god|pay|own|out|our|air|ors|one|old|ohs|gun|key|off|guy|now|not|nor|nod|nos|ago|new|hat|age|had|has|her|met|hey|may|hid|map|him|add|his|man|men|hit|mad|low|lot|hot|lip|how|lit|lie|kid|i'm|let|leg|i'd|ice|led|act|lay|law|ins|yes|yet|you|its|job|no|at|by|my|on|ha|do|ok|he|oh|is|tv|me|us|as|hi|go|if|of|am|up|to|we|so|in|or|it|be|an|i|a".split('|')
  55. # Randall's simplewriter handles apostrophes in far too careless a way.
  56. # I'm trying to be more careful, but have I overlooked something?
  57. # I allow every word to take "'s"; this is sloppy.
  58. valid_words_apostrophe = valid_words + [word + "'s" for word in valid_words]
  59. # Here are all the extra contractions in Wiktionary that the simplewriter accepts,
  60. # plus "weren't" which Randall tells me it was meant to (p.c.)
  61. valid_words_apostrophe += "can't|could've|couldn't've|he'd|he'd've|he'll|he's|he'sn't|how'd|how'll|how's|i'd|i'd've|i'll|i'm|i've|i'ven't|it'd|it'd've|it'll|it's|it'sn't|let's|might've|must've|not've|she'd|she'd've|she'll|she's|she'sn't|should've|somebody'd|somebody'd've|somebody'dn't've|somebody'll|somebody's|someone'd|someone'd've|someone'll|someone's|something'd|something'd've|something'll|something's|that'll|that's|there'd|there'd've|there're|there's|they'd|they'dn't|they'dn't've|they'd've|they'd'ven't|they'll|they'lln't've|they'll'ven't|they're|they've|they'ven't|'tis|'twas|we'd|we'd've|we'dn't've|we'll|we'lln't've|we're|we've|what'll|what're|what's|what've|when's|where'd|where's|where've|who'd|who'd've|who'll|who're|who's|who've|why'll|why're|why's|won't|won't've|would've|wouldn't've|you'd|you'd've|you'll|you're|you'ren't|you've|you'ven't|weren't".split('|')
  62.  
  63. # The stress patterns information was derived from the CMU Pronouncing Dictionary,
  64. # with the first list at http://www.ranks.nl/stopwords (and a list of modal verbs)
  65. # used to indicate which monosyllables can lack stress altogether ('u').
  66. word_stresses_list = [set(s.split(' ')) for s in "1010|1010|1010|1010|0110|100|100|100|1010|1010|010|101|01000|0100|100|100|10 100|100|1010|1010|1010|010 0100|0100|010|0100|101|1010|100|10|010|010|0100|010|101|1010|010|10|010|0100|01|100|010|10|10|100|100 101|100|100|100|1010|010|101|10|010|101|10|100|10|10|010 0100|101|101|010|010|010|10|010|010|010|0100|10|1010|01|1010|1010|101|010|100|10 100|100|010|10 100|10|100|010|010|010|100|010|100|100|010|101|101|10|010|1000|100|10 100|100|100|100|0100|01|010|100|1010|010|10|10|10|10|10|100|10|10|10|10 100|101|101|100|010|010|010|10 100|10|010|010|100|10 100|100|010|010|10|10|10 01|10|010|100|010|10|010|10|10|100|10|10|010|10|010|1010|100|10 100|100|10|100|010|101|10|10|010|010|10|100|10|10|10|100|100|10|10|10|10|10|100|10 100|100|1000|100|10|10|10|10|10|10|100|1010|100|100|010|010|01|010|010|01|010|100|10|010|010|100|010|010|100|010|010|10|010|010|10|010|10|10|10|010|101|10|010|100|010|100|1010|100|10|100|100|10|10|10|10|10|10|10|100|100|100|100|10|100|010|1|10|100|10|010|10|010|10|010|100|10|100|010|101|1|100|10 100|10|100|100|100|100|010|10|10|100|10|10|100|10|100|10|100|100|10 100|100|100|10|100|10 100|100|100|10|100|100|10 100|100|10|10|100|100|10 100|010|100|10 100|10|10|100|100|10|10|0100|0100|10|010|100|100|101|10 100|100|10|10|10|10|10|10|100|100|100|10|10|100|100|100|01|01|10|10|10|010|010|10|01|010|10|010|10|10|01|10|10|010|010|010|01|010|10|10|10|10|10|10|10|10|100 101|10|10|10|10|10|1|10|10|10|10|10|10 100|10|100|10|010|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|010|010|10 100|10|01|10|100|10|10|10|10|100|100|10|10|010|10|010|01|10|100|10|10|10 100|10|10|10|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|01|10|10|10|10|10 100 1000|10|10|10|010|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10 100|10|10|100|10|100|10|10|10|10|1|100|10|100|1|10|10|1|10|1|01|100|100|10|10|1|010|010|100|10|10|10|10|010|101|100|100|1|010|10|010|1|010|10|10|10|01|100|10|10|10|10|10|10|10|01|10|01|10|010|010|010|10|010|10|10 100|01|10|10|10|10|010|10|010|10|010|100|010|010|10|10|10|10|01|010|100|10|01|10|100|10|100|10|10|10|100|01|10|01|10|10|10|10|10|1010|101|100|10|100|10|10|100|10|10|10|10|10|100|100|100|10|10|10|10|10|10|10|10|10|10|10|10|10|10|1|10|10|10|10|100|10|10|100|10|10|10|101|100|10|01|10|10|10|10|10|10|100|10|10|100|10|100|10|10|10|10|010|01|01|100|10|10|100|010|10|10|10|10|10|10|010|100|10|10|10|100|10|10|10|10|10|10|10|10|10|10|10|10|10 100|10|100|10|10 01|10|10 01|10|100|100|10|010|10|10|10|10|10|10|10|10|10|10|11|10|10|10|10|10|100|101|100|10|10|100|10|10|10 100|10|101|10|10|100|100|10|10|10|100 101|010|10|10|10|10|10|100|10|100|10|10|10|10|10|10|010|10|10|10|100|10|10|10 100|10 100|10|100|100|10|10|10|10|100|10|10|01|10|10|100|10|10|10|10|100|10|10|01|01|10|100|100|10|10|100|10|100|10|10|10|10|10|10|10|10|100|01|10|01|10|10|10|10|10|10|010|10|010|010|100|010|10|0100|10|10|10|10|10|010|10|10|10|101|10|10|10|10|100|10|10|10|10|10|10|10|10|10|10|10|1|10|10|10|10|10|10|100|10|10|01|10|10|10|10|1|1|10|10|10|10|10|10|10|10|10|100|1|10|10|10|10|10|1|10|10|10|100|10|1|10|10|10|10|10|100|10|10|10|100|10|10|10|10|10|100|10|10|10|10|10|10|10|10|10|10|1|10|10|1|10|10|10|10|10|10|10|100|10|10|10|1|10|10|10|10|10|10|01|10|10|010|10|01|10|10|10|10|10|10|010|10|10|10|10|1|10|1|10|1|10|1|100|010|10|10|01|1|010|10|01|10|u|01|10|10|1|1|01|10|1 01|1|10|10|10|10|10|10|10|100|10|10|10|10|10|01|10|10|10|01|10|10|10|10|10|1|010|10|10|10|01|01|10|1|10|10|10|10|01|100|10|10|10|010|01|10|01|10|10|10|100|01|10|10|10|10|10|10|1|10|10|1|10|10|10|10|01|1|010|01|10|10|10|10|10|10|10|10|1|10|10|10|10|10|10|10|10|100 1000|10|01|10|10|10|10|100|10|1000|10|101|101|10|10|10|10|1|100|10|10|10|10|10|1|10|10|10|10|10|10|10 100|10|10|10|1|10|10|10|10|10|100|10|10|10|10|10|1|10|100|100|10|10|10|10|10|10|10|10|101|10|1|10|10|10|10|10|10|10|10|10|1|100|10|10|10|10|10|10|10|100|10|10|10|10|10|10|10|101|10|10|10|10|10|10|01|1|10|01|10|1|10|01|1|10|10|10|10|10|01|10|10|010|10|01|01|01|1|1|10|10|10|01|10|10|1|00 01|10|10|10|01|10|10|10|10|1|10|1|10|10|01|10|1|1|10|10|01|10|10|10|10|10|10|10|10|1|010|100|10|01|10|10|10|10|10|10|10|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|10|10|1|10|10|10|10|1|10|10|10|10|10|10|10|1|10|10|10|10|10|10|10|1|10|1|1|10|01|10|10|100|10 01|10|10|10|10|10|10|10|10|10|10|10|01|1|10|010|10|1|1|10|01|10|10|10|10|10|10|10|1|10|01|10|10|10|11|10|10|10|10|1|10|10|10|10|10|1|10|1|100|10|1|100|10|10|010|100|10|10|10|10|1|1|10|10|100|10|10|1|010|10|10|01|10|10|10|10|10|100|10|100|10|10|10|10|10|10|10|10|10|10|10|10|10|1|10|10|1|10|1|10|10|01|100|10|10|10|10|10|01|10|10|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|01|010|010|10|10|10|10|010|10|10|10|10|10|10|10|1|10|10|10 100|100|10|01|10|01|10|10|10|10|10|10|10|10|10|10|10|01|01|10|10|10|10|10|10|10|100|100|10|01|10|10|10|10|10|100|10|01|10 100|1|1|10|10|10|10|10|1 01|1|10|10|01|10|10|1|10|100|1|1|10|10|10|10|10|10|10|10|10|10|10|10|10 100|10|1|10|10|10|10|10|01|10|10|010|10|10|10|10|10|10|10|10|10|10|1|10|10|10|10|10|10|10|10 100|10|10|10|1 10|10|10|10|10|10|10|10|01|10|01|10|010|10|1|10|10|100|10|10|010|10|1|10|10|10|010|1|10|10|10|10|10 01|01|10|10|1|1|01|10|10|10|10|1|10|1|1|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|10|10|1|10|01|1|1|10|10|10|1|1|10|10|10|10|10|1|1|1|1|1|10|1|01|10|10|10|10 01|10|100|10|10|1|01|01|10|01|10|01|10|10|10|1|1|1|1|1|10|10|10|10|10|10|10|1|1|10|10|10|10|1|10|10|01|10|10|10|1|10|10|1|1|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|1|10|1|01|1|1|1|10|01|10|10|10|10|10|10|1|100|01|10|10|10|10|1|10|1|10|10 01|10|10|10|1|10|10|10|1|1|10|10|10|1|10|1|10|10|10|10|10|01|10|10|10|10|10|10|1|10|10|1|10|10|10|10|10|1|1|100 101|10|10|10|1|10|10|1|10|01|10|10|1|1|10|10|1|10|1|01|10|10|1|10|10|1|10|01|01|110|10|10|10|100|1|10|01|1|10|10|10|1|1|1|10|10|10|10|1|1|1|10|1|10|1|1|10|10|10|10|1|10|10|10|1|10|10|10|10|10|1|1|1|10|1|10|10|10|1|10|10|1|10|10|1|10|10|10|1|10|1|10|10|1|10|10|10|10|1|10|10|10|10|10|01|1|10|1|1|10|1|1|10|1|10|10|10|1|01|01|10|10|1|10|10|10|10|10|10|10|1|1|10|1|10|10|10|10|1|1|10|1|10|10|1|10|10|10|10|1|101|1|10|1|10|10|1|1|10 100|10|10|10|10|10|10|10|10|10|10|1|10|1|1|1|10|10|10 01|01|10|01|1|10|10|01|01|10 100|10|10|10|1|10|10|01|01|10|10|10|10|10|10|1|10|01|1|10|10|10|1|1|01|1|01|01|1|1|01|1|10|10|1|10|010|10|10|10|1|10|10|10|10|01|10|10|10|10|10|10|1|1|10|1|10|1|1|10|1|1|10|10|1|10|10|10|10|10|1|10|10|10|10|1|100|100|1|1|1|1|10|10|10|10|10|10|10|1|10|10|10|10|1|10|1|10|10|1|1|10|1|10|1|10|1|1|1|1|1|1|1|1|10|u|1|1|1|1|1|10|10|1|10|1|10|1|10 u|10|1|10|1|1|10|10|10|10|10|10|10|10|10|01|1|10|10|10|10|1|1|10|1|1|10|10|10|1|10|1|1|1|10|10|1|1|10|1|1|1|10|01|10|1|10|10|1|10|10|1|10|10|1|1|10|10|1|10|10|10|1|10|10|10|1|1|10|1|1|1|10|10|1|1|1|1|10|10|10|1|1|1|10|1|1|1|1 01|1|10|1|1|1|10|1|1|1|1|10|1|1|10|10|1|1|10|10|1|10|1|1|10|10|1|10|10|1|100|10 01|1|1|1|10|10 100|10|1|1|1|10|1|1|1|10|100 101|1|100|10|10|10|1|1|10|1|1|1|10|1|10|1|1|1|1|1|1|10|10|1|1|101|10|1|1|1|10|10|10|10|10|10|10|1|10|10|10|10|1|1|1|1|10|01|10|1|10|1|1|10|1|1|10|1|10|1|1|1|1|1|1|1|10|1|10|100|10|10|10 01|10|10|1|10|10|10|10|10|01|10|1|1|10|10|01|10|1 01|10|1|01|1|01|10|01|01|01|10|01|01|10|10|1|10|10 u|1|1|1 10|1|10|01|10|1|10|01|01|10|10|1|10|10|1|1|10|1|10|10|10|1|1|10|1|10|10|10|1|10|1|1|10|10|1|10|01|01|10|01|100|10|10|1|10|1|10|1|1|1|1|1 10|1|1|1|1|1|10|1|1|1|10|1|1|1|1|1|1|10|01|10|1|010|1|10|10|10|1|10|1|1|1|1|1|10|1|1|10|10|1|1|01|1|10|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|10|1|1|u|10|1|1|1|1|10|1|10|1|1|10|1|10|1|1|10|10|1|1|1|1|1|1|10|1|1|10|1|10|10|10|1|1|10|1|1|1|1|1|1|1|1|1|10|1|1|1|1|10|1|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|10|1|10|1|1|10|1|1|1|1|1|10|1|10|1|1|01|1|10|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|10|10|10|1|1|10|10|10|1|1|1|1|1|1|1|10|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|01|1|1|1|1|1|1|10|1|10|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|10|10|10|1|1|1|1|101|1|1|1|1|1|1|1|1|10|10|1|1|10|1|1|1|1|1|10|1|1|1 10|1|10|1|1|1|1|10|1|10|1|1|1|1|1|10|10|1|1|u|1|1|10|u|10|1|1|1|1|1|10|u|10|1|1|1|u|1|01|10|1|1|1|u|10|1|10|1|1|1|1|1|1|1|10|1|1|1|10|1|01|1|1|1|1|1|10|1|1|1|10|1|01|1|10|1|1|1|1|1|1|10|10|01|1|10|1|1|01|1|1|1|01|1|1|10|10|1|10|1|10|1|1|1|1|10|10|1|1|1|1|1|1|10|1|10|1|1|1|10|10|1|100|1|10|1|1|1|1|u|1|1|u|u|1|10|10|1|1|10|1|1|1|1|1|01|1|1|1|1|1|u|1|u|10|1|10|1|10|1|1|1|1|10|1|10|1|10|1|1|1|1|10|10|1|1|10|10|10|1|01|1|10|10|10|1|1|1|1|10|1|1|1|10|1|10|10 100|1|1|1|10|1|01|1|10|1|1|1|1|10|1|1|1|1|01|1|10|1|10|1|10|1|10 01|1|01|01|10|1|1|1|1|1|1|1|1|10|01|01|1|01|1|1|1|10|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|10 00|10|1|1|10|1|10|1|10|1|1|1|01|1|1|1|10|1|100|1|10|10|10|1|1|1|10|1|10|1|1|1|1|1|10|10|10|1|1|1|1|1|1|1|1|10|1|1|1|1|1|01|1|1|1|1|1|u|10|1|1|01|1|1|1|1|01|10|10|u|1|1|1|10|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1 0|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1 10|1|1|1|1|1|1|1|1|010|1|1|1|10 00 01|1|1|1|1|1|1|10|1|1|1|1 0|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|10|10|1|1|1|1|1|1|1|1|1|10 u|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|u|u|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|100|10|u|1|1|1|1|u|u|1|u|u|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|u|1|1|u|1|1|1|u|u|1|u|1|1|1|1|1|1|1|1|1|1|1|01|10|1|1|1|10|1|1|1|11|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|10|1|1|1|u|1|1 10|1|1|1|1|1|10|u|1|1|1|10|10|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|1|u|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1|10|1|1|1|10|1 10|1|1|1|1|1|1|1|01|1|1|10|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|1|1|10|1|10|1|01|u|10|1|1|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|1|1|1|1|1|1|u|1|1|u|1|1|1|1|1|u|1|1|0|1|11|1|1|1|1|1|0|1|10|u|1|1|0|1|1|101|1|1|1|u|1|1|1|1|u|1|1|1|1|u|1|u|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|u|1|1|u|1|1|1|1|1|1|u|u|10 u|1|1|1|1|1|1|1|u|1|1|u|0|1|1|01|1|1|1|u|u|u|1|1|u|1|1|u|1|u|1|1|1|1|1|1|1|1|u|1|1|1|u|1|1|u|1|1|1|1|1|1 111|1|1|u|u|1|u|u|u|u|u|1|u|11|u|1|u|11 1010|u|1 11|u|1|1|u|u|11 u|u|u|u|u|u|0|u|u|0|u|0|10100|1010|10100|1010|01100|1000|1000|1000|1010|1010|010|1010|01000|0100|100|100|100 1000|1000|1010|10100|10100|010 0100|01000|0100|0100|101|1010|100|10|0100|010|0100|010|1010|1010|010|100|010|0100|01|100|010|10|10|1000|1010 1000|100|100|1000|10100|0100|1010|100|010|1010|100|100|100|100|010 0100|101|101|010|0100|010|100|0100|0100|010|0100|100|1010|010|1010|10100|101|0100|1000|10 100|100|0100|100 1000|10|1000|010|010|0100|100|010|1000|1000|010|101|101|100|0100|1000|1000|100 1000|100|1000|100|01000|01|0100|100|1010|010|100|100|100|100|100|1000|100|100|10|10 100|101|101|100|0100|010|0100|1000|10|0100|010|100|10 100|100|010|010|100|10|010 100|10|010|100|010|10|0100|100|10|1000|100|100|010|100|010|1010|100|10 100|100|100|100|0100|101|100|100|0100|010|100|1000|10|100|10|100|100|100|100|100|100|100|100|100 1000|100|1000|100|10|10|10|10|10|100|100|1010|1000|100|010|010|01|010|010|01|0100|1000|10|010|010|100|0100|010|100|010|010|10|010|0100|10|010|100|10|100|010|1010|100|010|1000|010|100|1010|100|100|100|100|100|100|100|100|10|100|10|100|1000|100|100|100|100|010|1|100|100|100|010|100|010|10|010|100|100|1000|0100|101|10|100|10 100|100|100|100|100|1000|010|100|10|100|10|100|100|100|1000|100|100|100|100 1000|100|100|100|100|10 100|100|100|100|100|100|100 1000|100|100|10|100|1000|100 1000|010|1000|10 100|100|10|100|100|100|10|0100|01000|10|010|1000|100|101|100 1000|100|100|100|10|100|100|10|1000|100|100|10|10|1000|100|100|01|010|10|100|10|010|010|100|01|0100|100|010|10|10|010|100|10|010|010|010|100|010|10|10|100|10|10|100|100|100|100|100|100|10|10|10|10|100|10|100|10|10|10 100|10|100|10|010|10|10|10|10|100|10|10|10|100|10|10|100|100|10|10|100|100|10|100|10|100|10|100|010|0100|10 100|100|010|10|100|10|100|100|10|100|1000|100|100|010|10|010|01|10|100|10|100|10 100|100|10|100|10|10|10|1|10|10|10|10|10|100|100|10|10|10|10|010|10|10|100|100|10 100 1000|10|10|10|010|10|10|10|10|100|10|10|10|100|100|10|100|10|10|10 100|100|100|1000|10|100|100|10|100|100|1|100|100|1000|1|100|100|1|100|1|01|100|100|10|100|1|010|0100|100|100|10|100|10|0100|101|1000|100|1|010|100|010|10|010|100|100|10|010|1000|10|100|10|100|10|100|10|01|100|01|100|0100|010|010|100|010|10|1000|010|10|10|10|10|010|10|010|10|010|100|010|0100|100|100|100|100|01|010|100|100|01|10|100|100|100|10|100|100|100|01|100|010|100|10|100|10|100|10100|101|100|10|100|10|100|1000|10|10|100|100|10|1000|100|100|100|10|100|10|10|10|100|10|10|100|10|10|10|10|1|10|10|100|10|100|10|10|1000|10|100|100|101|100|10|010|100|100|100|10|10|100|100|10|100|100|10|1000|10|100|100|10|010|010|01|100|100|100|100|010|10|100|10|10|10|100|010|100|100|10|100|100|10|100|100|10|10|10|10|100|100|10|10|100|100|100|100|10|10 01|10|010 100|10|100|100|100|010|10|100|100|10|10|100|100|100|10|10|110|10|10|10|10|10|100|101|1000|10|10|1000|10|10|10 100|10|101|10|10|1000|100|100|100|100|1010 1000|010|10|10|100|10|100|100|10|100|10|100|10|10|10|10|010|100|10|100|100|100|100|10 100|10 100|10|1000|100|100|100|100|10|1010|100|100|01|10|100|100|100|10|100|10|100|100|100|01|010|10|100|100|100|10|100|100|100|10|10|100|10|10|100|10|100|100|010|100|01|10|100|10|100|10|100|010|10|0100|0100|100|010|100|0100|10|100|100|100|100|010|10|10|10|101|100|100|100|10|100|10|100|100|100|100|100|10|10|10|10|100|1|10|10|100|10|10|10|100|10|10|01|100|100|100|10|1|1|10|10|100|10|100|10|10|100|10|100|1|10|100|10|10|10|10|10|10|100|100|10|1|100|10|10|100|10|1000|10|10|100|100|10|100|10|10|100|100|100|10|10|10|100|10|100|100|10|100|10|10|10|10|100|10|10|100|10|10|100|100|10|100|10|10|100|10|100|100|10 100|10|01|100|10|010|10|010|10|10|100|10|10|100|0100|10|10|100|10|1|10|1|10|1|100|1|100|010|10|10|010|1|010|10|010|10|1|010|10|10|1|10|01|100|010 10|1|10|10|10|10|100|10|100|1000|10|10|100|10|100|010|100|10|100|010|10|100|10|100|10|10|010|100|10|10|010|01|10|1|100|10|10|10|010|100|10|10|100|010|01|10|010|100|10|10|100|010|10|100|100|100|10|10|1|10|10|1|100|10|10|10|010|1|0100|01|100|10|10|100|10|10|10|10|1|10|10|10|10|100|10|10|10|100 1000|10|01|10|10|100|10|100|100|1000|10|101|1010|10|100|10|10|1|100|100|10|100|100|10|10|10|100|100|10|10|10|100 1000|10|100|10|1|10|100|10|10|10|100|10|10|100|10|100|1|100|100|100|10|100|10|100|10|10|10|100|1010|100|1|100|100|10|10|10|10|10|10|10|10|100|10|100|10|100|100|10|100|100|10|10|100|100|100|10|100|100|100|10|10|10|10|10|01|1|10|010|100|1|10|01|1|10|10|10|10|100|010|10|100|010|100|01|010|010|1|1|100|10|100|01|100|10|1|010 000|10|100|10|01|10|100|10|10|1|10|10|10|10|010|10|10|10|10|10|010|100|100|10|10|10|10|10|10|1|0100|100|100|01|10|100|100|10|10|100|10|10|100|10|1|100|100|10|100|10|10|100|10|10|100|100|10|10|1|10|100|10|10|1|100|10|10|10|100|10|10|1|100|100|10|10|10|10|100|1|10|1|1|100|010|10|100|100|10 01|10|100|100|10|10|10|10|10|100|100|10|010|1|100|010|10|1|10|10|010|10|100|100|100|10|10|10|1|10|01|10|10|10|11|10|10|100|100|1|10|10|100|10|10|10|100|1|1000|10|1|100|10|10|010|100|10|10|10|10|1|1|100|10|1000|10|10|1|010|10|10|01|100|100|100|10|10|100|10|1000|10|10|100|10|10|10|100|10|10|10|10|10|10|1|100|10|1|100|1|10|10|010|100|10|10|10|10|10|01|10|10|100|100|10|1|10|10|100|100|10|10|10|10|100|10|10|010|010|010|100|100|10|100|010|10|10|10|100|100|100|10|1|100|10|10 100|100|100|010|100|01|10|100|100|10|100|10|100|100|10|10|10|010|01|10|100|10|100|100|100|10|100|100|100|01|10|10|10|10|10|1000|10|01|10 100|1|10|10|10|100|100|10|010 10|1|10|10|01|100|100|1|100|100|10|10|10|10|100|10|10|10|10|10|10|10|10|10|10 100|100|10|10|100|100|100|10|01|10|10|010|10|10|10|10|100|10|10|10|100|10|1|10|10|100|100|10|100|100|100 1000|10|10|10|1 10|10|10|10|100|10|10|10|01|10|010|100|010|10|1|10|10|100|10|10|010|10|1|10|10|10|010|1|10|100|10|100|010 100|01|10|10|1|1|01|10|10|10|100|1|10|1|1|10|10|10|1|10|100|10|10|100|10|10|10|10|100|10|100|100|1|10|01|10|1|10|10|10|1|1|10|10|10|10|100|1|1|10|10|1|10|10|01|100|10|10|10 01|10|1000|10|10|10|01|010|10|01|100|010|10|10|10|1|1|10|10|1|10|10|10|10|10|100|10|10|10|100|10|100|10|1|10|10|010|10|10|100|1|100|10|1|1|10|10|10|10|10|10|100|10|10|10|100|10|10|10|10|10|100|1|100|10|010|1|1|1|10|01|10|10|10|10|10|10|1|100|010|10|100|10|10|10|10|10|10|10|10|10|100|1|10|100|100|10|1|10|10|10|10|10|10|10|100|100|100|10|01|10|100|10|10|10|10|1|10|10|1|10|10|10|10|10|10|1|100 101|10|10|100|1|10|10|10|10|010|10|100|10|1|10|10|1|100|1|01|10|10|1|100|100|10|10|01|01|110|10|100|10|100|10|100|01|1|100|100|10|1|10|10|10|100|100|10|1|10|1|10|1|10|1|1|100|10|10|10|1|10|100|100|1|10|10|10|100|100|10|10|1|100|1|10|100|100|1|10|10|1|10|100|1|10|10|10|10|10|1|10|10|10|10|10|10|100|10|10|10|10|10|10|010|1|10|1|1|10|10|1|100|1|10|10|100|1|01|01|10|100|1|10|10|10|100|100|10|10|10|1|10|1|10|10|10|100|1|10|10|1|100|10|10|10|10|100|10|1|1010|1|100|1|100|10|1|10|10 100|10|100|10|10|10|10|10|10|100|10|1|10|1|10|10|10|10|010 100|01|10|01|1|10|10|01|01|10 100|10|10|10|1|10|10|01|01|100|10|10|10|10|100|10|10|010|1|10|100|100|1|1|01|1|01|01|10|1|010|10|10|10|1|10|010|10|100|10|1|100|10|10|10|010|10|10|100|10|10|100|1|10|10|10|10|1|1|10|10|1|10|10|10|10|10|10|100|10|1|10|10|10|100|1|100|100|10|10|1|1|10|10|10|100|100|10|10|10|10|100|10|100|1|10|10|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|10|10|100|1|1|10|1|10|1|10|10|1|10|10|10|1|1 10|10|1|100|10|1|10|10|10|100|10|10|10|10|10|01|10|10|10|10|10|10|10|100|1|10|10|10|10|1|10|10|1|10|10|10|10|1|10|10|10|1|10|01|100|1|10|10|10|10|10|10|10|10|1|1|10|10|10|10|100|10|10|10|10|100|10|1|10|10|1|1|100|10|10|10|10|10|10|100|10|10|10|1|10|10|10|1|1|1|100|10|10|10|10|10|10|1|10|10|10|10|10|10|1|10|10|100|1|10|1|10|10|10|10|10|10|10|100|10 01|1|10|10|10|10|10|1|1|1|100|1|1|1|10|101|1|100|10|10|10|10|1|10|10|10|10|10|10|10|1|1|10|1|1|10|10|10|10|10|101|10|10|10|10|10|10|10|10|100|10|100|1|10|10|10|10|1|1|1|1|10|01|100|10|10|10|10|10|10|10|10|10|10|1|10|1|10|1|10|10|10|10|100|100|100|10|010 100|10|10|1|10|10|10|10|10|01|10|1|1|10|100|01|10|1 01|10|10|01|10|01|10|01|010|01|10|01|01|10|10|10|10|1 10|10|10|1 10|1|10|01|100|1|10|01|010|10|10|1|10|100|1|1|100|1|10|10|10|1|1|10|1|10|10|10|1|10|1|1|10|100|1|100|010|01|100|01|100|10|100|10|10|10|10|10|10|1|10|10|10|10|10|10|1|10|1|1|1|10|10|1|1|10|10|10|100|01|10|1|0100|1|100|10|10|10|10|10|10|1|10|1|10|10|1|10|10|1|10|01|10|10|10|1|10|1|10|1|1|10|10|1|1|1|10|10|10|10|1|1|1|1|1|10|1|10|10|100|1|10|10|1|1|10|10|10|10|10|1|10|10|10|10|1|10|1|1|10|10|10|1|10|1|1|1|1|100|10|1|10|10|1|10|10|10|10|10|10|10|1|10|1|10|1 10|10|10|10|10|10|10|10|1|10|10|1|10|10|10|10|10|10|10|1|10|10|10|10|1|10|100|100|10|10|100|1|10|1|10|10|10|10|1|1|10|1|1|10|1|10|1|1|10|10|10|10|1|10|1|10|1|10|10|1|1|1|10|10|1|10|10|10|1|10|1|10|100|1|10|10|10|10|10|10|10|1|10|01|10|10|1|10|10|10|1|10|10|10|10|10|1|10|10|10|10|10|10|10|10|10|10|10|10|10|1|1|1|10|10|10|10|1|10|1|1|1|1|10|10|10|1|10|10|1|1|1|10|10|10|1|10|10|1|10|100|1|10|10|01|10|10|10|10|10|1|10|1|100|1|1|10|10|10|10|1|1|1|1|10|1|10|10|1|1|1|10|10|100|1|1|1|1|101|1|10|10|1|10|10|10|10|10|10|10|10|10|1|1|10|1|1|100|10|10|10 100|1|10|1|10|1|1|10|10|10|1|10|1|10|1|10|10|1|10|1|1|10|10|1|10|1|10|10|1|10|10|1|10|10|10|1|10|10|01|10|10|10|10|u|100|1|10|1|1|1|10|1|10|1|10|1|1|10|10|1|01|1|1|10|1|10|10|10|10|10|10|10|01|10|10|10|1|1|1|1|10|10|10|01|1|100|1|1|010|1|1|10|01|10|10|10|10|10|10|1|10|10|10|10|1|10|10|10|1|10|10|10|10|100|10|10|10|10|10|10|10|1|1000|1|100|10|10|10|1|1|10|10|u|10|1|10|10|1|1|10|10|10|1|1|1|01|10|1|10|1|1|1|10|10|10|1|100|1|10|10|1|1|1|100|1|10|1|10|10|1|10|10|10|10|1|1|10|100|10|10|10 01|10|10|10|10|1|10|1|10|10|10|10|1|10|10|10|10 100|1|1|1|100|10|01|1|100|1|10|10|10|10|10|10|10|1|01|1|100|1|10|1|10|1|10 01|10|01|01|10|10|10|10|1|10|1|1|10|100|01|01|10|01|10|1|10|10|10|10|1|10|10|10|1|1|10|10|10|1|10|10|10|10 00|10|10|10|10|1|10|10|10|1|10|10|01|10|1|10|10|1|100|1|10|10|10|10|1|10|10|10|10|10|1|10|10|10|10|10|10|10|10|1|10|1|10|10|10|10|10|1|10|1|1|01|10|1|10|10|10|1|100|10|10|01|10|1|10|10|01|10|10|1|1|10|10|10|10|10|10|1|10|10|10|1|10|1|1|10|10|1|1|1|10|1|1|1|1|1|10|1|1|10|1|1|1|10|1|1|1|1|1|1|10|10|1|1|1|10|10|10|1|010|1|1|1|10 00 01|1|1|10|10|10|1|10|10|1|1|1|1|1|1|1|10|1|10|10|10|10|1|1|1|1|1|1|100|10|1|1|1|1|1|10|1|1|10|10 100|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|10|1|1|u|10|1|10|10|1|1|10|1|1|1|1|1|10|1|1|10|10|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|10|1|1|1|1|10|1|10|1|1|1|1|1|10|1|1|1|1|1|1|1|1|10|10|100|10|1|1|1|10|1|u|u|10|1|1|1|1|1|1|1|10|1|1|10|1|1|1|1|1|1|10|1|1|1|1|1|10|1|10|1|1|10|10|1|1|10|10|10|10|1|1|1|1|1|1|1|1|1|1|10|10|1|1|1|01|100|10|1|10|10|1|10|10|11|10|10|1|1|1|10|1|1|1|1|10|10|10|1|10|1|1|10|10|1|1|1|10|10|10|1|10|10|1|10|10|10|10|1|10|10|1|10|1|1|10|10|1|1|1|1|10|10|1|1|10|1|10|10|1|1|1|10|1|1|1|1|10|1|1|1|1|1|10|1|1|1|1|10|1|1|1|1|1|1|1|1|1|10|1|1|1|10|1|10|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|10|10|10|10|10|1 10|10|10|1|1|1|10|1|01|10|1|100|1|10|10|10|10|10|1|1|1|10|10|1|10|1|10|1|1|1|1|1|1|1|1|10|1|1|10|10|1|1|1|10|1|1|1|10|10|1|1|10|1|10|1|1|1|1|1|1|1|1|1|1|1|10|10|1|1|1|1|10|1|10|10|01|u|10|1|10|1|10|1|1|1|1|1|1|1|1|10|1|1|1|1|10|10|10|1|1|1|1|1|1|1|10|10|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|110|1|10|1|1|10|1|1|10|1|1|1|1|1|1|1010|1|1|1|u|1|1|1|10|u|1|1|1|1|1|1|u|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|1|1|1|1|1 10|1|10|1|1|10|1|1|1|1|1|1|1|1|10|01|1|1|10|1|10|1|1|1|1|1|1|1|1|10|1|1|1|1|1|1|1|1|u|1|1|1|1|u|1|1|10|1|1|1|1|10 1110|10|1|1|10|1|1|1|1|1|1|1|1|11|u|1|10|11|1|10 110|10|1|1|1|1|1 11|1|1|1|1|1 0|1|u|1|1|1|1|u|10|100|u|10|u|u|10|1|1|u|u|10|u|u|u|10|10|100|1 10|u|10|u|10|10|10|u|10|u|u|10|10|100 101|1010 1000|10000 10100|100 101|100|10|100|100|10|10|100|100|10|10|u|1|10|10|u|u|10|100|10|100|u|100|100|u|u|10|1|1|u|10|100|u|100|u|u|10|10|u|10|u|1|u|10|1|10|1|10|u|0|1|10|u|u|10|10|100|u|10|u|u|10|u|10|10 u".split('|')]
  67. word_stresses = collections.defaultdict(set, zip(valid_words_apostrophe, word_stresses_list))
  68. always_stressless = set("the|a|an|and|or|but|nor".split('|'))
  69.  
  70. # Iambic pentameter allows some variations.  
  71. # Score these with penalty points, so a flawless line scores 0 and more deviation incurs more penalties.
  72. # Too much crazy heuristicism went into this.
  73. iambic_res = collections.defaultdict(list)
  74. for (s0,f0,l0) in [(0,"[u0][1u]",2), (1,"[u0]0",2), (1,"1[u0]",2), (2,"11",2), (2,"[u0][u0]1",3)]:
  75.   for (s1,f1,l1) in [(0,"[u0][1u]",2), (1,"[u0]0",2), (4,"1[u0]",2), (2,"11",2), (2,"[u0][u0]1",3)]:
  76.     if f0=="[u0]0" and f1=="1[u0]": continue # no midword weak before an inversion
  77.     for (s2,f2,l2) in [(0,"[u0][1u]",2), (1,"[u0]0",2), (2,"1[u0]",2), (2,"11",2), (2,"[u0][u0]1",3)]:
  78.       if f1=="[u0]0" and f2=="1[u0]": continue
  79.       for (s3,f3,l3) in [(0,"[u0][1u]",2), (1,"[u0]0",2), (2,"1[u0]",2), (2,"11",2), (2,"[u0][u0]1",3)]:
  80.         if f2=="[u0]0" and f3=="1[u0]": continue
  81.         for (s4,f4,l4) in [(0,"[u0]1",2), (1,"[u0][u0]",2), (2,"11",2), (4,"1u",2), (2,"[u0][u0]1",3)]:
  82.           for (s5,f5,l5) in [(0,"",0), (1,"0",1), (2,"u",1)]:
  83.             iambic_res[l0+l1+l2+l3+l4+l5].append((s0+s1+s2+s3+s4+s5, re.compile("^"+f0+f1+f2+f3+f4+f5+"$")))
  84. for l in iambic_res:
  85.   iambic_res[l].sort()
  86.  
  87. TOO_LONG = 1e308 # sentinel
  88.  
  89. def iambic_line_badness(words):
  90.   stressings = reduce(lambda S,w: set([s+t for s in S for t in word_stresses[w]]),
  91.           words, set(['']))
  92.   if stressings == set([]):
  93.     raise Exception("stress dictionary miss: " + string.join(words))
  94.   if min(len(s) for s in stressings) > 16: # stop in this case; there are no lines so long
  95.     return TOO_LONG
  96.   if words[-1] in always_stressless:
  97.     return float('inf')
  98.   n = float('inf')
  99.   for s in stressings:
  100.     for (score, re) in iambic_res[len(s)]:
  101.       if re.match(s):
  102.         if score < n:
  103.           n = score
  104.   return n
  105.  
  106. # This threshold is totally heuristic.  
  107. # It was calibrated against 2000 words from Randall's relativity piece
  108. # and 2000 words of more-or-less iambic pentameter in general modern English (not subject to the word list).
  109. # As it stands, the average number of lines findable in the relativity piece is 4,
  110. # and the IMO more regular poems in my list have average badness under 1.8.
  111. permissible_error_per_line = 2.0
  112.  
  113. # A passage is in iambic pentameter if its total badness score is low enough relative to its length.
  114. def iambic_score(ws):
  115.   """Return the greatest number of consecutive lines of iambic pentameter to be found in ws."""
  116.   def assign_if_less(d, k, v):
  117.     if k not in d or d[k] > v:
  118.       d[k] = v
  119.  
  120.   n = 0
  121.  
  122.   # Evaluate subsequences of lines, in one pass.
  123.   # best_ending_at stores the best sequences seen so far.
  124.   # The outer index is position (after how many words?); the inner key is the number of lines of pentameter;
  125.   # the value is the lowest penalty score possible there.
  126.   best_ending_at = [{0: 0}]
  127.   for i in range(len(ws.words)):
  128.     best_here = {0: 0}
  129.     for j in range(i,-1,-1):
  130.       badness = iambic_line_badness(ws.words[j:i+1])
  131.       if badness == TOO_LONG: # stop checking for lines ending here once they're too long to be
  132.         break
  133.       if badness == float("inf"):
  134.         continue
  135.       for (length, accumulated_badness) in best_ending_at[j].items():
  136.         assign_if_less(best_here, length + 1, accumulated_badness + badness)
  137.         if (accumulated_badness + badness) <= permissible_error_per_line * (length + 1):
  138.           if length + 1 > n:
  139.             n = length + 1
  140.     best_ending_at.append(best_here)
  141.  
  142.   return n
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top