Guest User

Untitled

a guest
Jul 18th, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.42 KB | None | 0 0
  1. ## Tests [plain_text]
  2. [XX] - 'Brian Is In The Kitchen'
  3. [->] - 'Brian Is in the Kitchen'
  4.  
  5. [OK] - 'Bring It On'
  6.  
  7. [OK] - 'You Can Leave Your Hat On'
  8.  
  9. [OK] - 'One Is For'
  10.  
  11. [XX] - 'And You And I'
  12. [->] - 'And You and I'
  13.  
  14. [XX] - 'Love Is In The Air'
  15. [->] - 'Love Is in the Air'
  16.  
  17. [XX] - 'I Am The Walrus'
  18. [->] - 'I Am the Walrus'
  19.  
  20. [XX] - 'That Was Then , This Is Now'
  21. [->] - 'That Was Then, This Is Now'
  22.  
  23. [OK] - 'You Are So Beautiful'
  24.  
  25. [OK] - 'This Is As Good As It Gets'
  26.  
  27. [XX] - 'The Man Who Sold The World'
  28. [->] - 'The Man Who Sold the World'
  29.  
  30. [OK] - 'In a Safe Place'
  31.  
  32. [XX] - 'The Best Of The Temptation'
  33. [->] - 'The Best of The Temptation'
  34.  
  35. [XX] - 'Rattle And Hum'
  36. [->] - 'Rattle and Hum'
  37.  
  38. [XX] - 'It 's Now Or Never'
  39. [->] - 'It's Now or Never'
  40.  
  41. [OK] - 'Nothin' But a Good Time'
  42.  
  43. [OK] - 'Life Is But a Dream'
  44.  
  45. [XX] - 'Ain't But a Few Of Us Left'
  46. [->] - 'Ain't But a Few of Us Left'
  47.  
  48. [XX] - 'You Are But a Draft , a Long Rehearsal For a Show That Will Never Play'
  49. [->] - 'You Are But a Draft, a Long Rehearsal for a Show That Will Never Play'
  50.  
  51. [XX] - 'I Know You Are But What Am I'
  52. [->] - 'I Know You Are but What Am I'
  53.  
  54. [XX] - 'I don't Know What It Is But I Like It'
  55. [->] - 'I Don't Know What It Is but I Like It'
  56.  
  57. [XX] - 'Live At Woodstock'
  58. [->] - 'Live at Woodstock'
  59.  
  60. [XX] - 'Face To Face'
  61. [->] - 'Face to Face'
  62.  
  63. [XX] - 'Death Cab For Cutie'
  64. [->] - 'Death Cab for Cutie'
  65.  
  66. [XX] - 'Pretty In Pink'
  67. [->] - 'Pretty in Pink'
  68.  
  69. [OK] - 'Spy vs. Spy'
  70.  
  71. [OK] - 'Birds v. Worms'
  72.  
  73. [OK] - 'Time After Time etc.'
  74.  
  75. [XX] - 'Keep On rockin' In The Free World'
  76. [->] - 'Keep On Rockin' in the Free World'
  77.  
  78. [XX] - 'Come In From The Cold'
  79. [->] - 'Come In From the Cold'
  80.  
  81. [XX] - 'Nowhere To Run'
  82. [->] - 'Nowhere to Run'
  83.  
  84. [XX] - 'How To Dismantle An Atomic Bomb'
  85. [->] - 'How to Dismantle an Atomic Bomb'
  86.  
  87. [XX] - 'Song I Love To Sing'
  88. [->] - 'Song I Love to Sing'
  89.  
  90. [XX] - 'Reality Used To Be a Friend Of Mine'
  91. [->] - 'Reality Used to Be a Friend of Mine'
  92.  
  93. [XX] - 'An Ode To Death'
  94. [->] - 'An Ode to Death'
  95.  
  96. [OK] - 'Otis! The Definitive Otis Redding'
  97.  
  98. [XX] - 'In Time: The Best Of R . E . M .'
  99. [->] - 'In Time: The Best of R.E.M.'
  100.  
  101. [XX] - 'I'm Just a Singer (In a Rock 'n' Roll Band) '
  102. [->] - 'I'm Just a Singer (In a Rock 'n' Roll Band)'
  103.  
  104. [XX] - 'I'm Just; A Singer (Otis! Rox) '
  105. [->] - 'I'm Just; A Singer (Otis! Rox)'
  106.  
  107. [XX] - 'The Go - Gos'
  108. [->] - 'The Go-Gos'
  109.  
  110. [XX] - 'At The Drive - In'
  111. [->] - 'At the Drive-In'
  112.  
  113. [XX] - 'The Boy With The x - Ray Eyes'
  114. [->] - 'The Boy With the X-Ray Eyes'
  115.  
  116. [XX] - 'R . E . M'
  117. [->] - 'R.E.M'
  118.  
  119. [XX] - 'N . W . A'
  120. [->] - 'N.W.A'
  121.  
  122. [XX] - 'R . O . C . K In The U . S . A'
  123. [->] - 'R.O.C.K in the U.S.A'
  124.  
  125. [OK] - 'Rock 'n' Roll'
  126.  
  127. [XX] - 'Will o' The Wisp'
  128. [->] - 'Will o' the Wisp'
  129.  
  130. [OK] - 'Sweet Child o' Mine'
  131.  
  132. [XX] - 'Nick Cave And The Bad Seeds'
  133. [->] - 'Nick Cave and The Bad Seeds'
  134.  
  135. [XX] - 'Elvis Costello And The Attractions'
  136. [->] - 'Elvis Costello and The Attractions'
  137.  
  138. [XX] - 'Huey Lewis And The News'
  139. [->] - 'Huey Lewis and The News'
  140.  
  141. ## Script avec tests [python]
  142. #!/usr/bin/env python
  143. #encoding: utf-8
  144.  
  145. from MontyTagger import MontyTagger
  146. import re
  147.  
  148. tagger = MontyTagger()
  149. ns_list = ['—']
  150. nsb_list = ['?', '!', ':', ';']
  151. sbna_list = ['(', '“']
  152. sanb_list = [')', '”']
  153. no_space_before_chars = re.compile("([?!:;—])")
  154. space_before_not_after_chars = re.compile("([\\(\\)\"“”])")
  155.  
  156. def music_capitalization(title):
  157. #Split with () and “”
  158. splitted_title = space_before_not_after_chars.split(title)
  159. if len(splitted_title) > 1:
  160. #Recall this function with each groups to further split
  161. for i in xrange(0, len(splitted_title)):
  162. if (splitted_title[i] not in sbna_list and
  163. splitted_title[i] not in sanb_list):
  164. splitted_title[i] = music_capitalization(splitted_title[i])
  165. else:
  166. #Split with ?!:; etc...
  167. splitted_title = no_space_before_chars.split(splitted_title[0])
  168. for i in xrange(0, len(splitted_title)):
  169. if splitted_title[i] not in nsb_list and splitted_title[i] != '':
  170. splitted_title[i] = music_capitalize_sentence(splitted_title[i])
  171. #Join the full sentence again taking car of the correct spacings
  172. capitalized_title = ""
  173. for element in splitted_title:
  174. if element in nsb_list:
  175. capitalized_title += "%s " % element
  176. elif element in sbna_list:
  177. capitalized_title += " %s" % element
  178. elif element in sanb_list:
  179. capitalized_title += "%s " % element
  180. else:
  181. capitalized_title += element
  182. return capitalized_title
  183.  
  184. def music_capitalize_sentence(title):
  185. splitted_title = title.lower().split()
  186.  
  187. #versus substitution
  188. substitutions = []
  189. for i in xrange(0, len(splitted_title)):
  190. word = splitted_title[i]
  191. if word == 'versus':
  192. substitutions.append(('versus', 'versus',))
  193. elif word == 'vs.':
  194. splitted_title[i] = 'versus'
  195. substitutions.append(('versus', 'vs.',))
  196. elif word == 'v.':
  197. splitted_title[i] = 'versus'
  198. substitutions.append(('versus', 'v.',))
  199. elif word == 'etcetera':
  200. substitutions.append(('etcetera', 'etcetera',))
  201. elif word == 'etc.':
  202. splitted_title[i] = 'etcetera'
  203. substitutions.append(('etcetera', 'etc.',))
  204. elif word == '.':
  205. substitutions.append(('.', '.',))
  206. elif '-' in word:
  207. splitted_word = word.split('-')
  208. for el in splitted_word:
  209. substitutions.append(('.', '-',))
  210. splitted_title.pop(i)
  211. splitted_title.insert(i, '.'.join(splitted_word))
  212.  
  213. tagged_title = []
  214. for element in tagger.tag(' '.join(splitted_title), expand_contractions_p=1).split():
  215. tagged_title.append(element.split('/'))
  216.  
  217. #Capitalize all nouns, verbs, adverbs, subordinating conjunctions,
  218. #adjectives and pronouns
  219. prev_prev_element = None
  220. prev_element = None
  221. for element in tagged_title:
  222. word = element[0].lower()
  223. tag = element[1]
  224. if '\'' in word:
  225. continue
  226. elif tag.startswith('.'):
  227. if prev_element and len(prev_element[0]) == 1:
  228. prev_element[0] = prev_element[0].capitalize()
  229. elif len(word) == 1:
  230. if (prev_prev_element and len(prev_prev_element[0]) == 1 and
  231. prev_element and prev_element[0] == '.'):
  232. prev_prev_element[0] = prev_prev_element[0].capitalize()
  233. element[0] = element[0].capitalize()
  234. #Determinent except 'a', 'an' and 'the'
  235. elif tag.startswith('DT') or tag.startswith('WDT'):
  236. if word == 'a' or word == 'an' or word == 'the':
  237. element[0] = word
  238. else:
  239. element[0] = element[0].capitalize()
  240. #Coordinating Conjunction
  241. elif tag.startswith('CC'):
  242. #but after a verb is probably an adverb (not always but the tagger is wrong on this)
  243. if word == 'but' and prev_element != None and prev_element[1].startswith('VB'):
  244. element[0] = element[0].capitalize()
  245. elif (word == 'and' or
  246. word == 'but' or
  247. word == 'or' or
  248. word == 'nor' or
  249. word == 'for' or
  250. word == 'yet' or
  251. word == 'so'):
  252. element[0] = word
  253. else:
  254. element[0] = element[0].capitalize()
  255. #Preposition except len(prep) <= 3
  256. elif tag.startswith('IN'):
  257. if len(word) <= 3 or word == 'versus':
  258. element[0] = word
  259. else:
  260. element[0] = element[0].capitalize()
  261. #Adverbs except etcetera
  262. elif tag.startswith('RB') or tag.startswith('WRB'):
  263. if word == 'etcetera':
  264. element[0] = word
  265. else:
  266. element[0] = element[0].capitalize()
  267. #Verbs
  268. elif tag.startswith('VB'):
  269. #'to' used to form an infinitive
  270. if tag == 'VB' and prev_element and prev_element[1] == 'TO':
  271. prev_element[0] = prev_element[0].lower()
  272. element[0] = element[0].capitalize()
  273. else:
  274. element[0] = word.capitalize()
  275. prev_prev_element = prev_element
  276. prev_element = element
  277.  
  278. #Capitalize the first and last word
  279. tagged_title[0][0] = tagged_title[0][0].capitalize()
  280. tagged_title[-1][0] = tagged_title[-1][0].capitalize()
  281.  
  282. #Reverse the substitutions
  283. idx = 0
  284. for sub, orig in substitutions:
  285. for element in tagged_title[idx:]:
  286. idx += 1
  287. word = element[0]
  288. if word.lower() == sub:
  289. element[0] = orig
  290. break
  291.  
  292. #print tagged_title
  293.  
  294. #Rejoin the title without the POS tags
  295. capitalized_title = None
  296. for i in xrange(0, len(tagged_title)):
  297. element = tagged_title[i]
  298. if capitalized_title == None:
  299. capitalized_title = element[0]
  300. elif element[1] in [',', '.']:
  301. capitalized_title += '%s' % element[0]
  302. elif i > 0 and tagged_title[i-1][1] == '.':
  303. capitalized_title += '%s' % element[0]
  304. else:
  305. capitalized_title += ' %s' % element[0]
  306. return capitalized_title.strip()
  307.  
  308.  
  309. def test(shouldbe):
  310. result = music_capitalization(shouldbe)
  311. if result == shouldbe:
  312. print "[OK] - '" + result + "'"
  313. else:
  314. print "[XX] - '" + result + "'\n[->] - '" + shouldbe + "'"
  315. print
  316.  
  317. print
  318. test("Brian Is in the Kitchen")
  319.  
  320. test("Bring It On")
  321. test("You Can Leave Your Hat On")
  322. test("One Is For")
  323. test("And You and I")
  324.  
  325. test("Love Is in the Air")
  326. test("I Am the Walrus")
  327. test("That Was Then, This Is Now")
  328. test("You Are So Beautiful")
  329. test("This Is As Good As It Gets")
  330.  
  331. test("The Man Who Sold the World")
  332. test("In a Safe Place")
  333. test("The Best of The Temptation") #TODO (the -> The)
  334.  
  335. test("Rattle and Hum")
  336. test("It's Now or Never")
  337. test("Nothin' But a Good Time")
  338. test("Life Is But a Dream")
  339. test("Ain't But a Few of Us Left") #TODO but -> But
  340. test("You Are But a Draft, a Long Rehearsal for a Show That Will Never Play")
  341. test("I Know You Are but What Am I") #TODO But -> but
  342. test("I Don't Know What It Is but I Like It") #TODO But -> but
  343.  
  344. test("Live at Woodstock")
  345. test("Face to Face")
  346. test("Death Cab for Cutie")
  347. test("Pretty in Pink")
  348.  
  349. test("Spy vs. Spy")
  350. test("Birds v. Worms")
  351. test("Time After Time etc.") #TODO "et cetera" will pass unoticed
  352. test("Keep On Rockin' in the Free World") #TODO on -> On without in -> In
  353. test("Come In From the Cold") #TODO in -> In
  354.  
  355. test("Nowhere to Run")
  356. test("How to Dismantle an Atomic Bomb")
  357. test("Song I Love to Sing")
  358. test("Reality Used to Be a Friend of Mine")
  359. test("An Ode to Death")
  360.  
  361. test("Otis! The Definitive Otis Redding")
  362. test("In Time: The Best of R.E.M.")
  363. test("I'm Just a Singer (In a Rock 'n' Roll Band)")
  364. test("I'm Just; A Singer (Otis! Rox)")
  365.  
  366. test("The Go-Gos")
  367. test("At the Drive-In")
  368. test("The Boy With the X-Ray Eyes")
  369.  
  370. test("R.E.M")
  371. test("N.W.A")
  372. test("R.O.C.K in the U.S.A")
  373.  
  374. test("Rock 'n' Roll")
  375. test("Will o' the Wisp")
  376. test("Sweet Child o' Mine")
  377.  
  378. test("Nick Cave and The Bad Seeds") #TODO the -> The
  379. test("Elvis Costello and The Attractions") #TODO the -> The
  380. test("Huey Lewis and The News") #TODO the -> The
Add Comment
Please, Sign In to add comment