eBook Formatting Word Macro

Since there are plenty of eBooks with bad formatting, I found it a hassle to run search and replace all the time. So I created this macro for formatting eBook. You're welcome to comment and ask for additional features. Anyway please be gentle, I'm a Java programmer so I don't really know to navigate VB beautifully.

Here is the code :
   1: ' Main Macro:
   2: '
   3: ' CleanAndFormat()
   4: ' CleanOCR()
   5: ' RemoveAllHyperlinks()
   6: ' Other macros are for supporting these there main macros.
   7:  
   8: ' USE THIS FOR CLEANING AND FORMATTING EBOOK DOCUMENT WITH A GOOD PARAGRAPH LINE.
   9: '
  10: ' Version change log:
  11: ' v2.1, July 30 - Added Indent right and left indent to 0 and LineSpacing = LinesToPoints(1)
  12: ' v2.2, July 30 - Added font size changes to all 12, because some bad document have mixed up font size + Set all font color to black.
  13: ' v2.3, August 01 - Minor changes, added join pages adjusment for font that is not Bold. (For skipping title or heading)
  14: ' v2.4, Aug 01 - Minor changes,  Remove Paragraph Justify since mobipocket use automatic justification
  15: ' v2.5, Aug 02 - Added Single Non Breaking Space in for Convertion to Single Space.
  16: '                                          - Added a conversion from ^s into whitespace before conversion into single whitespace.
  17: ' v2.6, Aug 07 - Added comma as a clause for join if a paragraph end or start with a comma.
  18:  
  19: Sub CleanAndFormat()
  20:     ' Simple macro.
  21:     Application.Run MacroName:="RemovePageAndSectionBreak"
  22:     
  23:     ' Convert manual line break to paragraph break.
  24:     Application.Run MacroName:="CovertLineBreakToParagraphBreak"
  25:     ' Convert any space into single space.
  26:     Application.Run MacroName:="ConvertAllSpaceToSingleSpace"
  27:     
  28:     ' Remove all space before and after paragraph so that later on it can be use with JoinPages more successfully.
  29:     Application.Run MacroName:="RemoveAnySpaceBeforeOrAfterParagraph"
  30:     ' Remove all extras paragraph in document so that join pages will be more successful.
  31:     Application.Run MacroName:="ConvertAllToSingleParagraph"
  32:     
  33:     ' Added >< on the document after each join for manual reviewing.
  34:     Application.Run MacroName:="JoinPages"
  35:     
  36:     ' Simple formatting.
  37:     Application.Run MacroName:="FormatDocument"
  38: End Sub
  39:  
  40: ' USE THIS ONLY ON OCR DRAFT DOCUMENT.
  41: ' Use this macro for cleaning a document that have double paragraph for each new line and single paragraph for line break.
  42: ' v1.0 - Original OCR.
  43: ' v1.1 - Added convert all ^p three or more into ^p^p, before joining hard break line.
  44:  
  45: Sub CleanOCR()
  46:     Application.Run MacroName:="CleanOCRStepOne"
  47:     Application.Run MacroName:="CleanOCRStepTwo"
  48: End Sub
  49:  
  50: ' Removing hyperlinks for some eBook that have a lot of broken links since it only include a single HTML
  51: ' or Merging file using toc but didn't not update the hyperlinks manually.
  52: Sub RemoveAllHyperlinks()
  53: Dim i As Integer
  54:  
  55: For i = ActiveDocument.Hyperlinks.Count To 1 Step -1
  56:  
  57:     ActiveDocument.Hyperlinks(i).Delete
  58:  
  59: Next i
  60: End Sub
  61: ' - Convert all whitespace and non breaking space into single whitespace
  62: ' - Remove any whitespace before or after a paragraph.
  63: ' - Conversion into a single paragraph.
  64: Sub Tidy()
  65:     Application.Run MacroName:="ConvertAllSpaceToSingleSpace"
  66:     Application.Run MacroName:="RemoveAnySpaceBeforeOrAfterParagraph"
  67:     Application.Run MacroName:="ConvertAllToSingleParagraph"
  68: End Sub
  69:  
  70: ' --------------------------------------------------------------------------
  71: ' BELOW IS INDIVIDUAL SUB FOR RUNNING SPECIFIC MACRO.
  72: ' --------------------------------------------------------------------------
  73:  
  74:  
  75: Sub CleanOCRStepTwo()
  76:     ' Convert any space into single space.
  77:     Application.Run MacroName:="ConvertAllSpaceToSingleSpace"
  78:     
  79:     ' Need to run this method one more time because in step one, I can only remove 2 ^p and it will added space after ^p.
  80:     ' Remove all space before and after paragraph so that later on it can be use with JoinPages more successfully.
  81:     Application.Run MacroName:="RemoveAnySpaceBeforeOrAfterParagraph"
  82:     
  83:     ' Remove all extras paragraph in document so that join pages will be more successful.
  84:     Application.Run MacroName:="ConvertAllToSingleParagraph"
  85:     ' Need more attention.
  86:     Application.Run MacroName:="JoinPages"
  87:     
  88:     ' Simple formatting.
  89:     Application.Run MacroName:="FormatDocument"
  90: End Sub
  91:  
  92: Sub CleanOCRStepOne()
  93:     Application.Run MacroName:="RemovePageAndSectionBreak"
  94:     Application.Run MacroName:="CovertLineBreakToParagraphBreak"
  95:     Application.Run MacroName:="RemoveAnySpaceBeforeOrAfterParagraph"
  96:     
  97:     Selection.Find.ClearFormatting
  98:     Selection.Find.Replacement.ClearFormatting
  99:     
 100:     With Selection.Find
 101:         .Text = "^13{3,}"
 102:         .Replacement.Text = "^p^p"
 103:         .Forward = True
 104:         .Wrap = wdFindContinue
 105:         .Format = False
 106:         .MatchCase = False
 107:         .MatchWholeWord = False
 108:         .MatchWildcards = True
 109:         .MatchSoundsLike = False
 110:         .MatchAllWordForms = False
 111:     End With
 112:     Selection.Find.Execute Replace:=wdReplaceAll
 113:     
 114:     With Selection.Find
 115:         .Text = "^p^p"
 116:         .Replacement.Text = "{br}"
 117:         .Forward = True
 118:         .Wrap = wdFindContinue
 119:         .Format = False
 120:         .MatchCase = False
 121:         .MatchWholeWord = False
 122:         .MatchWildcards = False
 123:         .MatchSoundsLike = False
 124:         .MatchAllWordForms = False
 125:     End With
 126:     Selection.Find.Execute Replace:=wdReplaceAll
 127:     
 128:     With Selection.Find
 129:         .Text = "^p"
 130:         .Replacement.Text = " "
 131:         .Forward = True
 132:         .Wrap = wdFindContinue
 133:         .Format = False
 134:         .MatchCase = False
 135:         .MatchWholeWord = False
 136:         .MatchWildcards = False
 137:         .MatchSoundsLike = False
 138:         .MatchAllWordForms = False
 139:     End With
 140:     Selection.Find.Execute Replace:=wdReplaceAll
 141:     
 142:     With Selection.Find
 143:         .Text = "{br}"
 144:         .Replacement.Text = "^p"
 145:         .Forward = True
 146:         .Wrap = wdFindContinue
 147:         .Format = False
 148:         .MatchCase = False
 149:         .MatchWholeWord = False
 150:         .MatchWildcards = False
 151:         .MatchSoundsLike = False
 152:         .MatchAllWordForms = False
 153:     End With
 154:     Selection.Find.Execute Replace:=wdReplaceAll
 155:     
 156: End Sub
 157: ' Important to support join page.
 158: Sub RemovePageAndSectionBreak()
 159:     Selection.Find.ClearFormatting
 160:     Selection.Find.Replacement.ClearFormatting
 161:     
 162:     With Selection.Find
 163:         .Text = "^b"
 164:         .Replacement.Text = "^p"
 165:         .Forward = True
 166:         .Wrap = wdFindContinue
 167:         .Format = False
 168:         .MatchCase = False
 169:         .MatchWholeWord = False
 170:         .MatchWildcards = False
 171:         .MatchSoundsLike = False
 172:         .MatchAllWordForms = False
 173:     End With
 174:     Selection.Find.Execute Replace:=wdReplaceAll
 175:     
 176:     With Selection.Find
 177:         .Text = "^m"
 178:         .Replacement.Text = "^p"
 179:         .Forward = True
 180:         .Wrap = wdFindContinue
 181:         .Format = False
 182:         .MatchCase = False
 183:         .MatchWholeWord = False
 184:         .MatchWildcards = False
 185:         .MatchSoundsLike = False
 186:         .MatchAllWordForms = False
 187:     End With
 188:     Selection.Find.Execute Replace:=wdReplaceAll
 189:  
 190: End Sub
 191: ' Important to support join page.
 192: Sub RemoveAnySpaceBeforeOrAfterParagraph()
 193:     Selection.Find.ClearFormatting
 194:     Selection.Find.Replacement.ClearFormatting
 195:     
 196:     With Selection.Find
 197:         .Text = "[ ^s]{1,}^13"
 198:         .Replacement.Text = "^p"
 199:         .Forward = True
 200:         .Wrap = wdFindContinue
 201:         .Format = False
 202:         .MatchCase = False
 203:         .MatchWholeWord = False
 204:         .MatchAllWordForms = False
 205:         .MatchSoundsLike = False
 206:         .MatchWildcards = True
 207:     End With
 208:     Selection.Find.Execute Replace:=wdReplaceAll
 209:     
 210:     With Selection.Find
 211:         .Text = "^13[ ^s]{1,}"
 212:         .Replacement.Text = "^p"
 213:         .Forward = True
 214:         .Wrap = wdFindContinue
 215:         .Format = False
 216:         .MatchCase = False
 217:         .MatchWholeWord = False
 218:         .MatchAllWordForms = False
 219:         .MatchSoundsLike = False
 220:         .MatchWildcards = True
 221:     End With
 222:     Selection.Find.Execute Replace:=wdReplaceAll
 223:     
 224: End Sub
 225: ' Important to support join pages.
 226: Sub ConvertAllSpaceToSingleSpace()
 227:     Selection.Find.ClearFormatting
 228:     Selection.Find.Replacement.ClearFormatting
 229:     
 230:     With Selection.Find
 231:         .Text = "^s"
 232:         .Replacement.Text = " "
 233:         .Forward = True
 234:         .Wrap = wdFindContinue
 235:         .Format = False
 236:         .MatchCase = False
 237:         .MatchWholeWord = False
 238:         .MatchAllWordForms = False
 239:         .MatchSoundsLike = False
 240:         .MatchWildcards = False
 241:     End With
 242:     Selection.Find.Execute Replace:=wdReplaceAll
 243:     
 244:     With Selection.Find
 245:         .Text = " {2,}"
 246:         .Replacement.Text = " "
 247:         .Forward = True
 248:         .Wrap = wdFindContinue
 249:         .Format = False
 250:         .MatchCase = False
 251:         .MatchWholeWord = False
 252:         .MatchAllWordForms = False
 253:         .MatchSoundsLike = False
 254:         .MatchWildcards = True
 255:     End With
 256:     Selection.Find.Execute Replace:=wdReplaceAll
 257:     
 258:     With Selection.Find
 259:         .Text = "  "
 260:         .Replacement.Text = " "
 261:         .Forward = True
 262:         .Wrap = wdFindContinue
 263:         .Format = False
 264:         .MatchCase = False
 265:         .MatchWholeWord = False
 266:         .MatchAllWordForms = False
 267:         .MatchSoundsLike = False
 268:         .MatchWildcards = False
 269:     End With
 270:     Selection.Find.Execute Replace:=wdReplaceAll
 271:  
 272: End Sub
 273: ' Joining pages where in original book it's separated and in eBook version
 274: ' the paragraph should be together not in different paragraph.
 275: Sub JoinPages()
 276:     Selection.Find.ClearFormatting
 277:     Selection.Find.Replacement.ClearFormatting
 278:     
 279:     ' Needed for skipping headings, title and any bold text
 280:     Selection.Find.Font.Bold = False
 281:     
 282:     ' Step 1: join lower case letter in the first line that have a line break before it.
 283:     With Selection.Find
 284:         .Text = "^13[a-z,]"
 285:         .Replacement.Text = " ><^&"
 286:         .Forward = True
 287:         .Wrap = wdFindContinue
 288:         .Format = True
 289:         .MatchCase = False
 290:         .MatchWholeWord = False
 291:         .MatchAllWordForms = False
 292:         .MatchSoundsLike = False
 293:         .MatchWildcards = True
 294:     End With
 295:     Selection.Find.Execute Replace:=wdReplaceAll
 296:     
 297:     With Selection.Find
 298:         .Text = " [>][<]*^13"
 299:         .Replacement.Text = " >< "
 300:         .Forward = True
 301:         .Wrap = wdFindContinue
 302:         .Format = True
 303:         .MatchCase = False
 304:         .MatchWholeWord = False
 305:         .MatchAllWordForms = False
 306:         .MatchSoundsLike = False
 307:         .MatchWildcards = True
 308:     End With
 309:     Selection.Find.Execute Replace:=wdReplaceAll
 310:     
 311:     ' Step 2: join a letter that has a lower case letter before its paragraph
 312:     With Selection.Find
 313:         .Text = "[,a-z]^13"
 314:         'foo^p><
 315:         .Replacement.Text = "^&>< "
 316:         .Forward = True
 317:         .Wrap = wdFindContinue
 318:         .Format = True
 319:         .MatchCase = False
 320:         .MatchWholeWord = False
 321:         .MatchAllWordForms = False
 322:         .MatchSoundsLike = False
 323:         .MatchWildcards = True
 324:     End With
 325:     Selection.Find.Execute Replace:=wdReplaceAll
 326:     
 327:     With Selection.Find
 328:         .Text = "^13[>][<] "
 329:         .Replacement.Text = " >< "
 330:         .Forward = True
 331:         .Wrap = wdFindContinue
 332:         .Format = True
 333:         .MatchCase = False
 334:         .MatchWholeWord = False
 335:         .MatchAllWordForms = False
 336:         .MatchSoundsLike = False
 337:         .MatchWildcards = True
 338:     End With
 339:     Selection.Find.Execute Replace:=wdReplaceAll
 340:  
 341: End Sub
 342:  
 343: Sub FormatDocument()
 344:     Selection.WholeStory
 345:     
 346:     ' Remove Paragraph Justify since mobipocket use automatic justification
 347:     ' Selection.ParagraphFormat.Alignment = wdAlignParagraphJustify
 348:     
 349:     
 350:     ' Change QuickStyles to eBook
 351:     ActiveDocument.ApplyQuickStyleSet ("eBook 2")
 352:     
 353:     ' Font Styles -- Important to set font size 12 since some eBook contains normal text but different size which is a mistake.
 354:     Selection.Font.Name = "Cambria"
 355:     Selection.Font.Size = 12
 356:     Selection.Font.Color = -587137025
 357:     
 358:     ' Important: some eBook contains mixed up line spacing in normal paragraph, so I need to set it to 1.
 359:     Selection.ParagraphFormat.LineSpacing = LinesToPoints(1)
 360:     
 361:     With Selection.PageSetup
 362:         .LineNumbering.Active = False
 363:         .Orientation = wdOrientPortrait
 364:         .TopMargin = InchesToPoints(0.5)
 365:         .BottomMargin = InchesToPoints(0.5)
 366:         .LeftMargin = InchesToPoints(0.5)
 367:         .RightMargin = InchesToPoints(0.5)
 368:         .Gutter = InchesToPoints(0)
 369:         .HeaderDistance = InchesToPoints(0.5)
 370:         .FooterDistance = InchesToPoints(0.5)
 371:         .PageWidth = InchesToPoints(8.5)
 372:         .PageHeight = InchesToPoints(11)
 373:         .FirstPageTray = wdPrinterDefaultBin
 374:         .OtherPagesTray = wdPrinterDefaultBin
 375:         .SectionStart = wdSectionNewPage
 376:         .OddAndEvenPagesHeaderFooter = False
 377:         .DifferentFirstPageHeaderFooter = False
 378:         .VerticalAlignment = wdAlignVerticalTop
 379:         .SuppressEndnotes = False
 380:         .MirrorMargins = False
 381:         .TwoPagesOnOne = False
 382:         .BookFoldPrinting = False
 383:         .BookFoldRevPrinting = False
 384:         .BookFoldPrintingSheets = 1
 385:         .GutterPos = wdGutterPosLeft
 386:     End With
 387:     
 388:     With Selection.ParagraphFormat
 389:         .SpaceBeforeAuto = False
 390:         .SpaceAfterAuto = False
 391:         .FirstLineIndent = InchesToPoints(0.13)
 392:         .RightIndent = InchesToPoints(0)
 393:         .LeftIndent = InchesToPoints(0)
 394:     End With
 395:  
 396:     WordBasic.OpenOrCloseParaBelow
 397:     WordBasic.OpenOrCloseParaAbove
 398:     WordBasic.OpenOrCloseParaBelow
 399:     WordBasic.OpenOrCloseParaAbove
 400:  
 401: End Sub
 402: ' Important to support join pages.
 403: Sub ConvertAllToSingleParagraph()
 404:  
 405:     Selection.Find.ClearFormatting
 406:     Selection.Find.Replacement.ClearFormatting
 407:     With Selection.Find
 408:         .Text = "^13{2,}"
 409:         .Replacement.Text = "^p"
 410:         .Forward = True
 411:         .Wrap = wdFindContinue
 412:         .Format = False
 413:         .MatchCase = False
 414:         .MatchWholeWord = False
 415:         .MatchAllWordForms = False
 416:         .MatchSoundsLike = False
 417:         .MatchWildcards = True
 418:     End With
 419:     Selection.Find.Execute Replace:=wdReplaceAll
 420:  
 421: End Sub
 422: ' Important to support join pages.
 423: Sub CovertLineBreakToParagraphBreak()
 424:  
 425:     Selection.Find.ClearFormatting
 426:     Selection.Find.Replacement.ClearFormatting
 427:     With Selection.Find
 428:         .Text = "^l"
 429:         .Replacement.Text = "^p"
 430:         .Forward = True
 431:         .Wrap = wdFindContinue
 432:         .Format = False
 433:         .MatchCase = False
 434:         .MatchWholeWord = False
 435:         .MatchWildcards = False
 436:         .MatchSoundsLike = False
 437:         .MatchAllWordForms = False
 438:     End With
 439:     Selection.Find.Execute Replace:=wdReplaceAll
 440: End Sub
 441: