Advertisement
Guest User

Untitled

a guest
Dec 1st, 2012
551
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. Sub LayoutPrep()
  2. '
  3. '  Macro written by DSpider, around mid 2012.
  4. '
  5. '  What this does, is it breaks down a Word document into a squeaky clean document,
  6. '  with its basic formatting intact, ready for re-applying styles or quick styles
  7. '  in Word, or in Adobe InDesign.
  8. '
  9. '    - Bold text
  10. '    - Italic text
  11. '    - Underlined text
  12. '    - Subscripts
  13. '    - Superscripts
  14. '    - Paragraph ending marks
  15. '    - Line Breaks
  16. '    - Page Breaks
  17. '
  18. '
  19. '  Set the desktop variable and the working directory for everything else.
  20. '
  21.     Dim WshShell As Object
  22.     Dim SpecialPath As String
  23.     Set WshShell = CreateObject("WScript.Shell")
  24.     DESKTOP = WshShell.SpecialFolders("Desktop")
  25.     ChangeFileOpenDirectory DESKTOP
  26. '
  27. '  1. Convert numbered and bulleted lists to regular text. Honestly, they're more
  28. '     of a nuisance with scanned text and OCR. Without this they would not make it
  29. '     to the second part because Word treats them (along with footnotes) more like
  30. '     objects.
  31. '
  32.     ActiveDocument.ConvertNumbersToText
  33. '
  34. '
  35. '  2. Convert footnotes to regular text so that they too make it intact.
  36. '
  37.    Dim afootnote As Footnote
  38.    Dim NumberOfFootnotes As Integer
  39.    Dim i As Integer
  40.    Dim aFootnoteReference As String
  41.    Dim aFootnoteRefTag As String
  42.  
  43.  NumberOfFootnotes = ActiveDocument.Footnotes.Count
  44.  For i = NumberOfFootnotes To 1 Step -1
  45.     Set afootnote = ActiveDocument.Footnotes(i)
  46.     afootnote.Range.Select
  47.     Selection.MoveStartWhile Cset:=" " & Chr(9)
  48.     Selection.Cut
  49.     aFootnoteReference = afootnote.Reference.Text
  50.     Select Case aFootnoteReference
  51.     Case Chr(2)
  52.     aFootnoteRefTag = "num"
  53.     Case "*"
  54.     aFootnoteRefTag = "star"
  55.     Case Else
  56.     aFootnoteRefTag = "symbol" _
  57.     & aFootnoteReference & "/FNRef"
  58.  End Select
  59.  afootnote.Reference.Select
  60.  If afootnote.Reference.Text = Chr(40) Then
  61.     With Dialogs(wdDialogInsertSymbol)
  62.         aFootnoteRefTag = _
  63.         "FNSym," & .Font & "," _
  64.         & .CharNum & ""
  65.     End With
  66.  End If
  67.  afootnote.Delete
  68.  Selection.InsertBefore ChrW(9616) _
  69.  & aFootnoteRefTag
  70.  Selection.Collapse (wdCollapseEnd)
  71.  Selection.Paste
  72.  Selection.InsertAfter ChrW(9612)
  73.  Next i
  74. '
  75. '
  76. '  3. Remove all the tab characters by replacing them with the space character.
  77. '     This is because FineReader sometimes adds multiple tabs when you only need
  78. '     one. Easier to spot too when you're assigning styles.
  79. '
  80.     Selection.Find.ClearFormatting
  81.     Selection.Find.Replacement.ClearFormatting
  82.     With Selection.Find
  83.         .Text = "^t"
  84.         .Replacement.Text = " "
  85.         .Forward = True
  86.         .Wrap = wdFindContinue
  87.         .Format = False
  88.         .MatchCase = False
  89.         .MatchWholeWord = False
  90.         .MatchWildcards = False
  91.         .MatchSoundsLike = False
  92.         .MatchAllWordForms = False
  93.     End With
  94.     Selection.Find.Execute Replace:=wdReplaceAll
  95. '
  96. '
  97. '  4. Paragraph-level formatting: Delete the ruler tabs.
  98. '
  99.     Selection.WholeStory
  100.     With Selection.ParagraphFormat
  101.         .SpaceBeforeAuto = False
  102.         .SpaceAfterAuto = False
  103.     End With
  104.     Selection.ParagraphFormat.TabStops.ClearAll
  105.     ActiveDocument.DefaultTabStop = InchesToPoints(0.5)
  106. '
  107. '
  108. '  5. Paragraph-level formatting: Align everything to the left.
  109. '
  110.     With Selection.ParagraphFormat
  111.         .LeftIndent = InchesToPoints(0)
  112.         .RightIndent = InchesToPoints(0)
  113.         .SpaceBefore = 0
  114.         .SpaceBeforeAuto = False
  115.         .SpaceAfter = 0
  116.         .SpaceAfterAuto = False
  117.         .LineSpacingRule = wdLineSpaceSingle
  118.         .Alignment = wdAlignParagraphLeft
  119.         .FirstLineIndent = InchesToPoints(0)
  120.         .OutlineLevel = wdOutlineLevelBodyText
  121.         .CharacterUnitLeftIndent = 0
  122.         .CharacterUnitRightIndent = 0
  123.         .CharacterUnitFirstLineIndent = 0
  124.         .LineUnitBefore = 0
  125.         .LineUnitAfter = 0
  126.     End With
  127. '
  128. '
  129. '  6. Replace the line break character (but keep the paragraph ending marks or
  130. '     else the process would get too slow), then replace the Page Break and
  131. '     Section Break.
  132. '
  133. With ActiveDocument.Content.Find
  134.     .ClearFormatting
  135.     .Replacement.ClearFormatting
  136.     .Forward = True
  137.     .Wrap = wdFindContinue
  138.     .Format = False
  139.     .MatchCase = False
  140.     .MatchWholeWord = False
  141.     .MatchAllWordForms = False
  142.     .MatchSoundsLike = False
  143.     .MatchWildcards = False
  144.     .Text = "^l"
  145.     .Replacement.Text = ChrW(9668)
  146.     .Execute Replace:=wdReplaceAll
  147.     .Text = "^m"
  148.     .Replacement.Text = ChrW(9618)
  149.     .Execute Replace:=wdReplaceAll
  150.     .Text = "^b"
  151.     .Replacement.Text = ChrW(9618)
  152.     .Execute Replace:=wdReplaceAll
  153. End With
  154. '
  155. '
  156. '  7. Italic characters.
  157. '
  158.     Selection.Find.ClearFormatting
  159.     Selection.Find.Font.Italic = True
  160.     Selection.Find.Replacement.ClearFormatting
  161.     With Selection.Find
  162.         .Text = "(?)"
  163.         .Replacement.Text = ChrW(9500) & "\1" & ChrW(9508)
  164.         .Forward = True
  165.         .Wrap = wdFindContinue
  166.         .Format = True
  167.         .MatchCase = False
  168.         .MatchWholeWord = False
  169.         .MatchAllWordForms = False
  170.         .MatchSoundsLike = False
  171.         .MatchWildcards = True
  172.     End With
  173.     Selection.Find.Execute Replace:=wdReplaceAll
  174. '
  175. '
  176. '  8. Bold characters.
  177. '
  178.     Selection.Find.ClearFormatting
  179.     Selection.Find.Font.Bold = True
  180.     Selection.Find.Replacement.ClearFormatting
  181.     With Selection.Find
  182.         .Text = "(?)"
  183.         .Replacement.Text = ChrW(9568) & "\1" & ChrW(9571)
  184.         .Forward = True
  185.         .Wrap = wdFindContinue
  186.         .Format = True
  187.         .MatchCase = False
  188.         .MatchWholeWord = False
  189.         .MatchAllWordForms = False
  190.         .MatchSoundsLike = False
  191.         .MatchWildcards = True
  192.     End With
  193.     Selection.Find.Execute Replace:=wdReplaceAll
  194. '
  195. '
  196. '  9.  Underlined characters.
  197. '
  198.     Selection.Find.ClearFormatting
  199.     Selection.Find.Font.Underline = wdUnderlineSingle
  200.     Selection.Find.Replacement.ClearFormatting
  201.     With Selection.Find
  202.         .Text = "(?)"
  203.         .Replacement.Text = ChrW(9556) & "\1" & ChrW(9559)
  204.         .Forward = True
  205.         .Wrap = wdFindContinue
  206.         .Format = True
  207.         .MatchCase = False
  208.         .MatchWholeWord = False
  209.         .MatchAllWordForms = False
  210.         .MatchSoundsLike = False
  211.         .MatchWildcards = True
  212.     End With
  213.     Selection.Find.Execute Replace:=wdReplaceAll
  214. '
  215. '
  216. '  10. Superscripts.
  217. '
  218.     Selection.Find.ClearFormatting
  219.     With Selection.Find.Font
  220.         .Superscript = True
  221.         .Subscript = False
  222.     End With
  223.     Selection.Find.Replacement.ClearFormatting
  224.     With Selection.Find
  225.         .Text = "(?)"
  226.         .Replacement.Text = ChrW(9560) & "\1" & ChrW(9563)
  227.         .Forward = True
  228.         .Wrap = wdFindContinue
  229.         .Format = True
  230.         .MatchCase = False
  231.         .MatchWholeWord = False
  232.         .MatchAllWordForms = False
  233.         .MatchSoundsLike = False
  234.         .MatchWildcards = True
  235.     End With
  236.     Selection.Find.Execute Replace:=wdReplaceAll
  237. '
  238. '
  239. '  11. Subscripts.
  240. '
  241.     Selection.Find.ClearFormatting
  242.     With Selection.Find.Font
  243.         .Superscript = False
  244.         .Subscript = True
  245.     End With
  246.     Selection.Find.Replacement.ClearFormatting
  247.     With Selection.Find
  248.         .Text = "(?)"
  249.         .Replacement.Text = ChrW(9554) & "\1" & ChrW(9557)
  250.         .Forward = True
  251.         .Wrap = wdFindContinue
  252.         .Format = True
  253.         .MatchCase = False
  254.         .MatchWholeWord = False
  255.         .MatchAllWordForms = False
  256.         .MatchSoundsLike = False
  257.         .MatchWildcards = True
  258.     End With
  259.     Selection.Find.Execute Replace:=wdReplaceAll
  260. '
  261. '
  262. '  12. Save to the desktop as 'Plain Text.txt' and close the file.
  263. '
  264.     ActiveDocument.SaveAs2 FileName:="Plain Text.txt", FileFormat:= _
  265.         wdFormatText, LockComments:=False, Password:="", AddToRecentFiles:=True, _
  266.         WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
  267.          SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
  268.         False, Encoding:=1200, InsertLineBreaks:=False, AllowSubstitutions:=False _
  269.         , LineEnding:=wdCRLF, CompatibilityMode:=0
  270.     ActiveDocument.Close
  271. '
  272. '
  273. ' --------------------------
  274. '  PART II
  275. ' --------------------------
  276. '
  277. ' Open the broken down text, to be able to restore the formatting.
  278. '
  279.     Documents.Open FileName:="Plain Text.txt", ConfirmConversions:=False, _
  280.         ReadOnly:=False, AddToRecentFiles:=False, PasswordDocument:="", _
  281.         PasswordTemplate:="", Revert:=False, WritePasswordDocument:="", _
  282.         WritePasswordTemplate:="", Format:=wdOpenFormatAuto, XMLTransform:="", _
  283.         Encoding:=1200
  284. '
  285. '
  286. '  1. Delete the useless "Plain Text" style. There's no such thing as plain text
  287. '     in Word. Everything has some kind of formatting applied to it.
  288. '
  289.     ActiveDocument.Styles("Plain Text").Delete
  290. '
  291. '
  292. '  2. Remove all quick styles from the document; default Word templates are
  293. '     unaffected.
  294. '
  295.     Dim s As Style
  296.     For Each s In ActiveDocument.Styles
  297.       If s.Type = wdStyleTypeCharacter Or _
  298.          s.Type = wdStyleTypeParagraph Or _
  299.          s.Type = wdStyleTypeLinked Then
  300.          s.QuickStyle = False
  301.       End If
  302.     Next s
  303. '
  304. '
  305. '  3.  Create a "Text" style and apply it to the document because "Normal" is too
  306. '      mainstream. For various adjustments you can add it to the Quick Styles menu.
  307. '
  308.     ActiveDocument.Styles.Add Name:="Text", Type:=wdStyleTypeParagraph
  309.     ActiveDocument.Content.Style = ActiveDocument.Styles("Text")
  310.         With ActiveDocument.Styles("Text").Font
  311.         .Name = "Times New Roman"
  312.         .Size = 10
  313.         .Bold = False
  314.         .Italic = False
  315.         .Underline = wdUnderlineNone
  316.         .UnderlineColor = wdColorAutomatic
  317.         .StrikeThrough = False
  318.         .DoubleStrikeThrough = False
  319.         .Outline = False
  320.         .Emboss = False
  321.         .Shadow = False
  322.         .Hidden = False
  323.         .SmallCaps = False
  324.         .AllCaps = False
  325.         .Color = wdColorAutomatic
  326.         .Engrave = False
  327.         .Superscript = False
  328.         .Subscript = False
  329.         .Scaling = 100
  330.         .Kerning = 0
  331.         .Animation = wdAnimationNone
  332.         .Ligatures = wdLigaturesNone
  333.         .NumberSpacing = wdNumberSpacingDefault
  334.         .NumberForm = wdNumberFormDefault
  335.         .StylisticSet = wdStylisticSetDefault
  336.         .ContextualAlternates = 0
  337.     End With
  338.     With ActiveDocument.Styles("Text")
  339.         .AutomaticallyUpdate = False
  340.         .BaseStyle = ""
  341.         .NextParagraphStyle = "Text"
  342.     End With
  343. '
  344. '
  345. '  4. Restore subscripts.
  346. '
  347.     Selection.Find.ClearFormatting
  348.     Selection.Find.Replacement.ClearFormatting
  349.     With Selection.Find.Replacement.Font
  350.         .Superscript = False
  351.         .Subscript = True
  352.     End With
  353.     With Selection.Find
  354.         .Text = ChrW(9554) & "(?)" & ChrW(9557)
  355.         .Replacement.Text = "\1"
  356.         .Forward = True
  357.         .Wrap = wdFindContinue
  358.         .Format = True
  359.         .MatchCase = False
  360.         .MatchWholeWord = False
  361.         .MatchAllWordForms = False
  362.         .MatchSoundsLike = False
  363.         .MatchWildcards = True
  364.     End With
  365.     Selection.Find.Execute Replace:=wdReplaceAll
  366. '
  367. '
  368. '  5. Restore superscripts.
  369. '
  370.     Selection.Find.ClearFormatting
  371.     Selection.Find.Replacement.ClearFormatting
  372.     With Selection.Find.Replacement.Font
  373.         .Superscript = True
  374.         .Subscript = False
  375.     End With
  376.     With Selection.Find
  377.         .Text = ChrW(9560) & "(?)" & ChrW(9563)
  378.         .Replacement.Text = "\1"
  379.         .Forward = True
  380.         .Wrap = wdFindContinue
  381.         .Format = True
  382.         .MatchCase = False
  383.         .MatchWholeWord = False
  384.         .MatchAllWordForms = False
  385.         .MatchSoundsLike = False
  386.         .MatchWildcards = True
  387.     End With
  388.     Selection.Find.Execute Replace:=wdReplaceAll
  389. '
  390. '
  391. '  6. Replace paragraph endings temporarily so that they too can receive bold and
  392. '     italic attributes. It's a little difficult to explain but just know that it's
  393. '     needed... For example if you have a block of text with italic (or bold)
  394. '     attributes, the paragraph ending marks (¶) will not receive the attribute and
  395. '     every line will have separate tags instead of treating it like a whole.
  396. '
  397. '     This is the reason the "decoding" process takes such a long time. Because you
  398. '     basically search and replace throughout a SINGLE 1+ MB paragraph (depending
  399. '     on the complexity of the book.
  400. '
  401.     With ActiveDocument.Content.Find
  402.         .ClearFormatting
  403.         .Replacement.ClearFormatting
  404.         .Forward = True
  405.         .Wrap = wdFindContinue
  406.         .Format = False
  407.         .MatchCase = False
  408.         .MatchWholeWord = False
  409.         .MatchAllWordForms = False
  410.         .MatchSoundsLike = False
  411.         .MatchWildcards = False
  412.         .Text = "^p"
  413.         .Replacement.Text = ChrW(9608)
  414.         .Execute Replace:=wdReplaceAll
  415.     End With
  416. '
  417. '
  418. '  7. Restore underlined characters.
  419. '
  420.     Selection.Find.ClearFormatting
  421.     Selection.Find.Replacement.ClearFormatting
  422.     Selection.Find.Replacement.Font.Underline = wdUnderlineSingle
  423.     With Selection.Find
  424.         .Text = ChrW(9556) & "(?)" & ChrW(9559)
  425.         .Replacement.Text = "\1"
  426.         .Forward = True
  427.         .Wrap = wdFindContinue
  428.         .Format = True
  429.         .MatchCase = False
  430.         .MatchWholeWord = False
  431.         .MatchAllWordForms = False
  432.         .MatchSoundsLike = False
  433.         .MatchWildcards = True
  434.     End With
  435.     Selection.Find.Execute Replace:=wdReplaceAll
  436. '
  437. '
  438. '  8. Restore bold characters.
  439. '
  440.     Selection.Find.ClearFormatting
  441.     Selection.Find.Replacement.ClearFormatting
  442.     Selection.Find.Replacement.Font.Bold = True
  443.     With Selection.Find
  444.         .Text = ChrW(9568) & "(?)" & ChrW(9571)
  445.         .Replacement.Text = "\1"
  446.         .Forward = True
  447.         .Wrap = wdFindContinue
  448.         .Format = True
  449.         .MatchCase = False
  450.         .MatchWholeWord = False
  451.         .MatchAllWordForms = False
  452.         .MatchSoundsLike = False
  453.         .MatchWildcards = True
  454.     End With
  455.     Selection.Find.Execute Replace:=wdReplaceAll
  456. '
  457. '
  458. '  9. Restore italic characters.
  459. '
  460.     Selection.Find.ClearFormatting
  461.     Selection.Find.Replacement.ClearFormatting
  462.     Selection.Find.Replacement.Font.Italic = True
  463.     With Selection.Find
  464.         .Text = ChrW(9500) & "(?)" & ChrW(9508)
  465.         .Replacement.Text = "\1"
  466.         .Forward = True
  467.         .Wrap = wdFindContinue
  468.         .Format = True
  469.         .MatchCase = False
  470.         .MatchWholeWord = False
  471.         .MatchAllWordForms = False
  472.         .MatchSoundsLike = False
  473.         .MatchWildcards = True
  474.     End With
  475.     Selection.Find.Execute Replace:=wdReplaceAll
  476. '
  477. '
  478. '  10. Restore paragraph ending marks, line breaks and page breaks.
  479. '
  480.     With ActiveDocument.Content.Find
  481.         .ClearFormatting
  482.         .Replacement.ClearFormatting
  483.         .Forward = True
  484.         .Wrap = wdFindContinue
  485.         .Format = False
  486.         .MatchCase = False
  487.         .MatchWholeWord = False
  488.         .MatchAllWordForms = False
  489.         .MatchSoundsLike = False
  490.         .MatchWildcards = False
  491.         .Text = ChrW(9608)
  492.         .Replacement.Text = "^p"
  493.         .Execute Replace:=wdReplaceAll
  494.         .Text = ChrW(9668)
  495.         .Replacement.Text = "^l"
  496.         .Execute Replace:=wdReplaceAll
  497.         .Text = ChrW(9618)
  498.         .Replacement.Text = "^m"
  499.         .Execute Replace:=wdReplaceAll
  500.     End With
  501. '
  502. '
  503. '  11. Save to the desktop as 'Formatted Text.rtf'. RTF chosen because it has the
  504. '      highest compatibility with InDesign, and possibly other word processors.
  505. '
  506.     ActiveDocument.SaveAs2 FileName:="Formatted Text.rtf", FileFormat:= _
  507.         wdFormatRTF, LockComments:=False, Password:="", AddToRecentFiles:=True, _
  508.         WritePassword:="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, _
  509.          SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:= _
  510.         False, CompatibilityMode:=0
  511.     ActiveDocument.Close
  512. '
  513. '
  514. ' 12. Clean-up, and a simple prompt message when it's done.
  515. '
  516.     Kill "Plain Text.txt"
  517.     Documents.Open FileName:="Formatted Text.rtf"
  518.     MsgBox ("Done!")
  519.    
  520. End Sub
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement