Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/pkg/scraper/xpath.go b/pkg/scraper/xpath.go
- index 8c26708..a9f8fc3 100644
- --- a/pkg/scraper/xpath.go
- +++ b/pkg/scraper/xpath.go
- @@ -92,7 +92,7 @@ func (c xpathRegexConfigs) apply(value string) string {
- }
- // remove whitespace again
- - value = commonPostProcess(value)
- + value = commonPostProcess(value, false)
- return value
- }
- @@ -170,7 +170,7 @@ func (c xpathScraperAttrConfig) concatenateResults(nodes []*html.Node) string {
- for _, elem := range nodes {
- text := NodeText(elem)
- - text = commonPostProcess(text)
- + text = commonPostProcess(text, true)
- result = append(result, text)
- }
- @@ -227,7 +227,7 @@ func (c xpathScraperAttrConfig) applySubScraper(value string) string {
- result = subScraper.concatenateResults(found)
- } else {
- result = NodeText(found[0])
- - result = commonPostProcess(result)
- + result = commonPostProcess(result, true)
- }
- result = subScraper.postProcess(result)
- @@ -246,12 +246,14 @@ func (c xpathScraperAttrConfig) postProcess(value string) string {
- return value
- }
- -func commonPostProcess(value string) string {
- +func commonPostProcess(value string, removeNewlines bool) string {
- value = strings.TrimSpace(value)
- -
- + var re *regexp.Regexp
- // remove multiple whitespace and end lines
- - re := regexp.MustCompile("\n")
- - value = re.ReplaceAllString(value, "")
- + if removeNewlines {
- + re = regexp.MustCompile("\n")
- + value = re.ReplaceAllString(value, "")
- + }
- re = regexp.MustCompile(" +")
- value = re.ReplaceAllString(value, " ")
- @@ -284,7 +286,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
- if len(found) > 0 {
- for i, elem := range found {
- text := NodeText(elem)
- - text = commonPostProcess(text)
- + text = (text)
- ret = ret.setKey(i, k, text)
- }
- @@ -304,7 +306,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
- } else {
- for i, elem := range found {
- text := NodeText(elem)
- - text = commonPostProcess(text)
- + text = commonPostProcess(text, true)
- text = attrConfig.postProcess(text)
- ret = ret.setKey(i, k, text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement