
Untitled
By: a guest on
May 5th, 2012 | syntax:
None | size: 2.78 KB | hits: 6 | expires: Never
Read a large file and output sections matching multiple parameters
use strict;
use warnings;
my @record;
my %f;
while(<DATA>) {
if (/^#START / .. /^#END */) {
if (/^#START /) {
@record = (); # reset
%f = ();
}
push @record, $_;
if (/^#END */) { # check and print
if ($f{'LOCATION'} !~ /$f{'MyField'}/) {
print @record;
}
} else { # add fields to hash
if (/^#FIELD (.+)/) {
# use split with limit of 2 fields
my ($key, $val) = split /=/, $1, 2;
next unless $val; # no empty values
$val =~ s/^"|"$//g; # strip quotes
$f{$key} = $val;
}
}
}
}
__DATA__
#START Descriptor
#FIELD LOCATION="http://path.to/file/here&Value=FOO&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
#START Descriptor
#FIELD LOCATION=http://path.to/file/here&Value=BAR&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
perl -ne 'BEGIN { $/ = "#ENDn" }' -e '/MyField="(.*?)"/; print if !/Value=$1/' <file >newfile
gawk '
{
if ($2!~/^#FIELD LOCATION/)
{
next;
}
else
{
split($2,ary,"=|&");
split($4,ary1,"=|"");
if(ary[4]!=ary1[3])
{
print $0 > "badrec.file"
}
}
}' RS="#ENDn" ORS="#ENDn" FS="n" file
[jaypal:~/Temp] cat file
#START Descriptor # Good Record
#FIELD LOCATION="http://path.to/file/here&Value=BAR&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
#START Descriptor # Bad Record
#FIELD LOCATION="http://path.to/file/here&Value=FOO&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
#START Descriptor # Good Record
#FIELD LOCATION="http://path.to/file/here&Value=BAR&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
[jaypal:~/Temp] gawk '
{
if ($2!~/^#FIELD LOCATION/)
{
next;
}
else
{
split($2,ary,"=|&");
split($4,ary1,"=|"");
if(ary[4]!=ary1[3])
{
print $0 > "badrec.file"
}
}
}' RS="#ENDn" ORS="#ENDn" FS="n" file
[jaypal:~/Temp] cat badrec.file
#START Descriptor # Bad Record
#FIELD LOCATION="http://path.to/file/here&Value=FOO&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
#!/usr/bin/perl
$/ = "#ENDn";
while (<DATA>) {
next unless /^#FIELD LOCATION/m;
/^#FIELD MyField="(.*)"$/m;
next if /^#FIELD LOCATION.*$1/m;
print
}
__DATA__
#START Descriptor
#FIELD LOCATION="http://path.to/file/here&Value=FOO&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END
#START Descriptor
#FIELD LOCATION=http://path.to/file/here&Value=BAR&OtherValue=BLAH"
#FIELD AnythingElse
#FIELD MyField="BAR"
#END