#!/usr/local/bin/perl use strict; our @tree = ("rss", ""); # Parsed RSS tree our $context = \@tree; # Context stack our @context = (); # Current context # Parse the RSS input file. This reads an RSS file from <>, and builds # a tree-like data structure representing it. For instance, # # Some Value # Another Value # # becomes an a tree of arrays of the form # ( name, attributes, @children ) # In this case: # ( [ "foo", # ' attr1="value1"', # [ # [ "bar", # ' attr2="value2"', # "Some Value" # ], # [ "baz", # "", # "Another Value" # ] # ] # ) # # @children is a mixed array of strings and arrays in the format above. # # Values can then be extracted with the &lookup() function. # # This code is fragile: it assumes that the input is good XML, but doesn't # attempt to validate it in any way. It assumes that each line either # begins or ends a container element, or contains an entire leaf element # (i.e., it's sensitive to line breaks). It'll probably break on all sorts # of trivial things, even with good XML. # # On the plus side, it doesn't require you to install umpteen Perl modules. $/ = ">"; while (<>) { s/^\s+//s; # Trim leading whitespace last if $_ eq ""; # End of file # See what kind of token this is. It might be: # 1) [text] # 2) [text] # 3) [text] # 4) # 5) # There might be text in front of the XML element. my $text; my $tag; m{^(.*)<(.*?)>$}s; $text = $1; $tag = $2; if ($text ne "") { push @{$context}, $text; } # See what kind of tag we've found if ($tag =~ m{^\?xml.*\?$}s) { # This is a # # or # # tag. Ignore it. next; } elsif ($tag =~ m{^\!}) { # This is a tag (maybe a comment). Ignore it. next; } elsif ($tag =~ m{ ^(/?) # Closing tag (\S+) # Tag name # Optional attributes ( # = "" (?:\s+ [\w:]+ # Attribute name = \"[^\"]*\" # Attribute value )* ) \s* (/?) # Optional closing slash, for singleton tag $ }xs) { # This is a regular , , or tag. my $closing = ($1 ne ""); # Is this a closing tag? my $name = $2; # Tag name my $attrs = $3; # Attribute assignments my $singleton = ($4 ne ""); # Is this a singleton tag? if ($closing) { # End of current context # Restore previous context from stack push @{$context[$#context]}, $context; $context = pop @context; next; } # This is either an opening tag or a singleton tag. In either # case, we need to start a new context # Save the old context on the context stack push @context, $context; # Start a new context, and switch to it $context = [ $name, $attrs ]; # If it's a singleton context, just append $context # to the parent context, and immediately pop back to # the parent context if ($singleton) { push @{$context[$#context]}, $context; $context = pop @context; } } else { print STDERR "Can't parse [$_]\n"; } } &dumptree(\@tree); # dumptree # Dump the tree, for debugging sub dumptree { my $tree = shift; # The tree or subtree to dump my $indent = shift; # Text to print at the beginning # of each line, for nested output if (!defined($indent)) { $indent = ""; } print "${indent}Name: [$tree->[0]]\n"; print "${indent}Attr: [$tree->[1]]\n"; for (my $i = 2; $i <= $#{$tree}; $i++) { my $child = $tree->[$i]; if (ref($child) eq "") { print "${indent} [$child]\n"; next; } if (ref($child) eq "ARRAY") { &dumptree($child, "${indent} "); next; } print STDERR "Unexpected node type for child $i: ", ref($child), "\n"; } print "${indent}End: [$tree->[0]]\n"; } # lookup($tree_ref, $node1, $node2, ... $nodeN) # Looks up a node in the tree $tree (as parsed by the big block at the # top). The $nodeN arguments comprise a pathname: # &lookup($tree_ref, "a", "b", "c"); # looks up the "c" element inside the "b" element inside the "a" element. # "a" must be the root of the tree. # # This function actually returns a list: &lookup($tree, "foo"); will # look up all of the "foo" elements inside $tree, and return them all # as an array. sub lookup { my $tree = shift; # Tree to look in my $name = shift; # First tag name to look for if ($tree->[0] ne $name) { # This node doesn't match what we're looking for return (); } # The name matches. Good. Do we need to look for a child tag? if (@_ == 0) { # We're at the end of the search. Return the current tree return ($tree); } my @retval = (); # Loop over children of this node for (my $i = 2; $i <= $#{$tree}; $i++) { my $child = $tree->[$i]; next if ref($child) eq ""; # Ignore strings push @retval, &lookup($child, @_); } return @retval; }