Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # First line in a set of duplicate lines is kept, rest are deleted.
- # Emulate human eyes on trailing spaces and tabs by trimming those.
- # Use after norepeat() to dedupe blank lines.
- # my answer to https://stackoverflow.com/questions/1444406/how-to-delete-duplicate-lines-in-a-file-without-sorting-it-in-unix/63322817#63322817
- dedupe() {
- sed -E '
- $!{
- N;
- s/[ \t]+$//;
- /^(.*)\n\1$/!P;
- D;
- }
- ';
- }
- # Delete duplicate, nonconsecutive lines from a file. Ignore blank
- # lines. Trailing spaces and tabs are trimmed to humanize comparisons
- # squeeze blank lines to one
- norepeat() {
- sed -n -E '
- s/[ \t]+$//;
- G;
- /^(\n){2,}/d;
- /^([^\n]+).*\n\1(\n|$)/d;
- h;
- P;
- ';
- }
- lastrepeat() {
- sed -n -E '
- s/[ \t]+$//;
- /^$/{
- H;
- d;
- };
- G;
- # delete previous repeated line if found
- s/^([^\n]+)(.*)(\n\1(\n.*|$))/\1\2\4/;
- # after searching for previous repeat, move tested last line to end
- s/^([^\n]+)(\n)(.*)/\3\2\1/;
- $!{
- h;
- d;
- };
- # squeeze blank lines to one
- s/(\n){3,}/\n\n/g;
- s/^\n//;
- p;
- ';
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement