Advertisement
bnkai

xpath.diff

May 21st, 2020
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 2.17 KB | None | 0 0
  1. diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go
  2. index 8c26708..a9f8fc3 100644
  3. --- a/pkg/scraper/xpath.go
  4. +++ b/pkg/scraper/xpath.go
  5. @@ -92,7 +92,7 @@ func (c xpathRegexConfigs) apply(value string) string {
  6.     }
  7.  
  8.     // remove whitespace again
  9. -   value = commonPostProcess(value)
  10. +   value = commonPostProcess(value, false)
  11.  
  12.     return value
  13.  }
  14. @@ -170,7 +170,7 @@ func (c xpathScraperAttrConfig) concatenateResults(nodes []*html.Node) string {
  15.  
  16.     for _, elem := range nodes {
  17.         text := NodeText(elem)
  18. -       text = commonPostProcess(text)
  19. +       text = commonPostProcess(text, true)
  20.  
  21.         result = append(result, text)
  22.     }
  23. @@ -227,7 +227,7 @@ func (c xpathScraperAttrConfig) applySubScraper(value string) string {
  24.             result = subScraper.concatenateResults(found)
  25.         } else {
  26.             result = NodeText(found[0])
  27. -           result = commonPostProcess(result)
  28. +           result = commonPostProcess(result, true)
  29.         }
  30.  
  31.         result = subScraper.postProcess(result)
  32. @@ -246,12 +246,14 @@ func (c xpathScraperAttrConfig) postProcess(value string) string {
  33.     return value
  34.  }
  35.  
  36. -func commonPostProcess(value string) string {
  37. +func commonPostProcess(value string, removeNewlines bool) string {
  38.     value = strings.TrimSpace(value)
  39. -
  40. +   var re *regexp.Regexp
  41.     // remove multiple whitespace and end lines
  42. -   re := regexp.MustCompile("\n")
  43. -   value = re.ReplaceAllString(value, "")
  44. +   if removeNewlines {
  45. +       re = regexp.MustCompile("\n")
  46. +       value = re.ReplaceAllString(value, "")
  47. +   }
  48.     re = regexp.MustCompile("  +")
  49.     value = re.ReplaceAllString(value, " ")
  50.  
  51. @@ -284,7 +286,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
  52.             if len(found) > 0 {
  53.                 for i, elem := range found {
  54.                     text := NodeText(elem)
  55. -                   text = commonPostProcess(text)
  56. +                   text = (text)
  57.  
  58.                     ret = ret.setKey(i, k, text)
  59.                 }
  60. @@ -304,7 +306,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
  61.                 } else {
  62.                     for i, elem := range found {
  63.                         text := NodeText(elem)
  64. -                       text = commonPostProcess(text)
  65. +                       text = commonPostProcess(text, true)
  66.                         text = attrConfig.postProcess(text)
  67.  
  68.                         ret = ret.setKey(i, k, text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement