Guest User

Untitled

a guest
Aug 20th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.57 KB | None | 0 0
  1. #!/bin/env perl
  2. #
  3. # This is a simple script that "greps" an XML based on
  4. # the names of the xml elements. It prints the contents
  5. # of the Text data in that element
  6. #
  7. # Author: Hector Rivas
  8. #
  9. use XML::Parser;
  10. use Getopt::Std;
  11.  
  12. my %Options;
  13. my $print_content; # Print or not this line
  14. my $first=1; # If this is the first line or not
  15. @parent = (); # Stores the name of the parent groups
  16. @element_content = (); # Stores the content of current element
  17. @element_subcontent = (""); # Stores the content of the childs
  18.  
  19. # initialize the parser
  20. my $parser = XML::Parser->new( Handlers => {
  21. Start=>\&handle_start,
  22. End=>\&handle_end,
  23. Char=>\&handle_char,
  24. });
  25.  
  26. # Parse the options
  27. if (not getopts('q1etp', \%Options)) {
  28. print_help();
  29. exit 1;
  30. }
  31. # Get the file name
  32. $filename = shift @ARGV;
  33. if (not $filename) {
  34. print_help();
  35. exit 0;
  36. }
  37.  
  38. # Elements to query. We create a hash for this.
  39. my %element_filter;
  40. @element_filter{@ARGV} = ();
  41. $parser->parsefile($filename);
  42. # Print the acumulated content
  43. print $element_subcontent[0];
  44.  
  45. sub print_help() {
  46. print <<HelpText;
  47. Usage: grepxml [options] input.xml [Element1 ...]
  48.  
  49. This program greps an XML printing the node names and the Text data.
  50. Options:
  51. -1 Print the first node searched and exit.
  52. -q Print the data, not the node names.
  53. -e Print also elements with empty content
  54. -p Print all the parents for eache elements (root.sub1.sub2.element val)
  55.  
  56. HelpText
  57. }
  58.  
  59. # Perl trim function to remove whitespace from the start and end of the string
  60. sub trim($)
  61. {
  62. my $string = shift;
  63. $string =~ s/^\s+//;
  64. $string =~ s/\s+$//;
  65. return $string;
  66. }
  67.  
  68. # For each element
  69. sub handle_start {
  70. my( $expat, $element, %attrs ) = @_;
  71.  
  72. # Add the initial content "" and subcontent
  73. unshift(@element_content, "");
  74. unshift(@element_subcontent, "");
  75. unshift(@parent, $element);
  76. }
  77.  
  78. # Process the Text nodes
  79. sub handle_char {
  80. my( $expat, $content ) = @_;
  81.  
  82. # Append the content to the last element
  83. $element_content[0] = $element_content[0] . $content;
  84. }
  85.  
  86.  
  87. # At the end
  88. sub handle_end {
  89. my( $expat, $element, %attrs ) = @_;
  90.  
  91. # Get all the path
  92. my $element_path = join(".", reverse(@parent)); shift @parent;
  93.  
  94. # If we have to filter check if it is in the hash
  95. my $print_content=1 if (keys( %element_filter ) == 0 or
  96. (exists $element_filter{$element} or exists $element_filter{$element_path}));
  97.  
  98. # Get the acumulated content
  99. my $content = trim(shift @element_content);
  100. # And the subelement content
  101. my $subcontent=shift @element_subcontent;
  102.  
  103. # The new content
  104. my $new_content="";
  105.  
  106. # If the Text is not empty (or enabled print empty contents) and we have to print this element
  107. if (($content or $Options{'e'}) and $print_content) {
  108. # Get the content of the parent:
  109.  
  110. if ($Options{'t'}) {
  111. $new_content .= " " x ($#element_content+1);
  112. }
  113. if (not $Options{'q'}) {
  114. if ($Options{'p'}) {
  115. $new_content .= "$element_path ";
  116. } else {
  117. $new_content .= "$element ";
  118. }
  119. }
  120. $new_content .= "$content\n";
  121. # If option -1 is set and we are filtering for this element, exit.
  122. if ($Options{'1'} and
  123. (exists $element_filter{$element} or exists $element_filter{$element_path})) {
  124. print $new_content;
  125. exit 0
  126. }
  127. }
  128. $element_subcontent[0].=$new_content.$subcontent;
  129. }
Add Comment
Please, Sign In to add comment