daily pastebin goal
22%
SHARE
TWEET

Untitled

a guest Aug 20th, 2018 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/bin/env perl
  2. #
  3. # This is a simple script that "greps" an XML based on
  4. # the names of the xml elements. It prints the contents
  5. # of the Text data in that element
  6. #
  7. # Author: Hector Rivas
  8. #
  9. use XML::Parser;
  10. use Getopt::Std;
  11.  
  12. my %Options;
  13. my $print_content; # Print or not this line
  14. my $first=1; # If this is the first line or not
  15. @parent = (); # Stores the name of the parent groups
  16. @element_content = (); # Stores the content of current element
  17. @element_subcontent = (""); # Stores the content of the childs
  18.  
  19. # initialize the parser
  20. my $parser = XML::Parser->new( Handlers => {
  21.                                 Start=>\&handle_start,
  22.                                 End=>\&handle_end,
  23.                                 Char=>\&handle_char,
  24.                                 });
  25.  
  26. # Parse the options                            
  27. if (not getopts('q1etp', \%Options)) {
  28.     print_help();
  29.     exit 1;
  30. }
  31. # Get the file name
  32. $filename = shift @ARGV;
  33. if (not $filename) {
  34.     print_help();
  35.     exit 0;
  36. }
  37.  
  38. # Elements to query. We create a hash for this.
  39. my %element_filter;
  40. @element_filter{@ARGV} = ();
  41. $parser->parsefile($filename);
  42. # Print the acumulated content
  43. print $element_subcontent[0];
  44.  
  45. sub print_help() {
  46.     print <<HelpText;
  47. Usage: grepxml [options] input.xml [Element1 ...]
  48.  
  49. This program greps an XML printing the node names and the Text data.
  50. Options:
  51.     -1  Print the first node searched and exit.
  52.     -q  Print the data, not the node names.
  53.     -e  Print also elements with empty content
  54.     -p  Print all the parents for eache elements (root.sub1.sub2.element val)
  55.    
  56. HelpText
  57. }
  58.  
  59. # Perl trim function to remove whitespace from the start and end of the string
  60. sub trim($)
  61. {
  62.     my $string = shift;
  63.     $string =~ s/^\s+//;
  64.     $string =~ s/\s+$//;
  65.     return $string;
  66. }
  67.  
  68. # For each element
  69. sub handle_start {
  70.     my( $expat, $element, %attrs ) = @_;
  71.  
  72.     # Add the initial content "" and subcontent
  73.     unshift(@element_content, "");
  74.     unshift(@element_subcontent, "");
  75.     unshift(@parent, $element);
  76. }
  77.  
  78. # Process the Text nodes
  79. sub handle_char {
  80.     my( $expat, $content ) = @_;
  81.  
  82.     # Append the content to the last element
  83.     $element_content[0] = $element_content[0] . $content;
  84. }
  85.  
  86.  
  87. # At the end
  88. sub handle_end {
  89.     my( $expat, $element, %attrs ) = @_;
  90.  
  91.     # Get all the path
  92.     my $element_path = join(".", reverse(@parent)); shift @parent;
  93.  
  94.     # If we have to filter check if it is in the hash
  95.     my $print_content=1 if (keys( %element_filter ) == 0 or
  96.         (exists $element_filter{$element} or exists $element_filter{$element_path}));
  97.  
  98.     # Get the acumulated content
  99.     my $content = trim(shift @element_content);
  100.     # And the subelement content
  101.     my $subcontent=shift @element_subcontent;
  102.  
  103.     # The new content
  104.     my $new_content="";
  105.    
  106.     # If the Text is not empty (or enabled print empty contents) and we have to print this element
  107.     if (($content or $Options{'e'}) and $print_content) {
  108.         # Get the content of the parent:
  109.        
  110.         if ($Options{'t'}) {
  111.             $new_content .= " " x ($#element_content+1);
  112.         }
  113.         if (not $Options{'q'}) {
  114.             if ($Options{'p'}) {
  115.                 $new_content .=  "$element_path ";
  116.             } else {
  117.                 $new_content .= "$element ";
  118.             }
  119.         }
  120.         $new_content .= "$content\n";
  121.         # If option -1 is set and we are filtering for this element, exit.
  122.         if ($Options{'1'} and
  123.             (exists $element_filter{$element} or exists $element_filter{$element_path})) {
  124.             print $new_content;
  125.             exit 0
  126.         }
  127.     }
  128.     $element_subcontent[0].=$new_content.$subcontent;
  129. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top