Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- English: https://neculaifantanaru.com/en/creating-a-batch-processing-powershell-with-regex-and-html-tags-parsing.html
- Romanian: https://neculaifantanaru.com/creating-a-batch-processing-powershell-with-regex-and-html-tags-parsing.html
- $sourcedir = "C:\Folder1\"
- $resultsdir = "C:\Folder1\"
- Get-ChildItem -Path $sourcedir -Filter *.html | ForEach-Object {
- $content = Get-Content -Path $_.FullName -Raw
- # Copy the content of the tag <link rel="canonical" in the tag "OG:URL" and in the tag "@ID": #
- $replaceValue = (Select-String -InputObject $content -Pattern '(?<=<link rel="canonical" href=").*(")').Matches.Value
- $content = $content -replace '(?<=<meta property="og:url" content=").*(")',$replaceValue
- $content = $content -replace '(?<="@id": ").*(")',$replaceValue
- # Copy the content of the tag <title> in the tags ABSTRACT, SUBJECT, OG:TITLE, HEADLINE, KEYWORDS #
- $replaceValue = (Select-String -InputObject $content -Pattern '(?<=<title>).+(?=</title>)').Matches.Value
- $content = $content -replace '(?<=<meta property="og:title" content=").+(?=")',$replaceValue
- $content = $content -replace '(?<=<meta name="abstract" content=").+(?=")',$replaceValue
- $content = $content -replace '(?<=<meta name="keywords" content=").+(?=")',$replaceValue
- $content = $content -replace '(?<=<meta name="Subject" content=").+(?=")',$replaceValue
- $content = $content -replace '(?<="headline": ").+(?=")',$replaceValue
- $content = $content -replace '(?<="keywords": ").+(?=")',$replaceValue
- # Copy the content of the tag <meta name="description" in the tags "OG:DESCRIPTION" and in the tag "description": " #
- $replaceValue = (Select-String -InputObject $content -Pattern '(?<=<meta name="description" content=").+(?=")').Matches.Value
- $content = $content -replace '(?<=<meta property="og:description" content=").+(?=")',$replaceValue
- $content = $content -replace '(?<="description": ").+(?=")',$replaceValue
- Set-Content -Path $resultsdir\$($_.name) $content
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement