#!/usr/local/bin/perl
use strict;
our @tree = ("rss", ""); # Parsed RSS tree
our $context = \@tree; # Context stack
our @context = (); # Current context
# Parse the RSS input file. This reads an RSS file from <>, and builds
# a tree-like data structure representing it. For instance,
#
# Some Value
# Another Value
#
# becomes an a tree of arrays of the form
# ( name, attributes, @children )
# In this case:
# ( [ "foo",
# ' attr1="value1"',
# [
# [ "bar",
# ' attr2="value2"',
# "Some Value"
# ],
# [ "baz",
# "",
# "Another Value"
# ]
# ]
# )
#
# @children is a mixed array of strings and arrays in the format above.
#
# Values can then be extracted with the &lookup() function.
#
# This code is fragile: it assumes that the input is good XML, but doesn't
# attempt to validate it in any way. It assumes that each line either
# begins or ends a container element, or contains an entire leaf element
# (i.e., it's sensitive to line breaks). It'll probably break on all sorts
# of trivial things, even with good XML.
#
# On the plus side, it doesn't require you to install umpteen Perl modules.
$/ = ">";
while (<>)
{
s/^\s+//s; # Trim leading whitespace
last if $_ eq ""; # End of file
# See what kind of token this is. It might be:
# 1) [text]
# 2) [text]
# 3) [text]
# 4)
# 5)
# There might be text in front of the XML element.
my $text;
my $tag;
m{^(.*)<(.*?)>$}s;
$text = $1;
$tag = $2;
if ($text ne "")
{
push @{$context}, $text;
}
# See what kind of tag we've found
if ($tag =~ m{^\?xml.*\?$}s)
{
# This is a
#
# or
#
# tag. Ignore it.
next;
} elsif ($tag =~ m{^\!})
{
# This is a tag (maybe a comment). Ignore it.
next;
} elsif ($tag =~ m{
^(/?) # Closing tag
(\S+) # Tag name
# Optional attributes
(
# = ""
(?:\s+
[\w:]+ # Attribute name
=
\"[^\"]*\" # Attribute value
)*
)
\s*
(/?) # Optional closing slash, for singleton tag
$
}xs)
{
# This is a regular , , or tag.
my $closing = ($1 ne ""); # Is this a closing tag?
my $name = $2; # Tag name
my $attrs = $3; # Attribute assignments
my $singleton = ($4 ne ""); # Is this a singleton tag?
if ($closing)
{
# End of current context
# Restore previous context from stack
push @{$context[$#context]}, $context;
$context = pop @context;
next;
}
# This is either an opening tag or a singleton tag. In either
# case, we need to start a new context
# Save the old context on the context stack
push @context, $context;
# Start a new context, and switch to it
$context = [ $name, $attrs ];
# If it's a singleton context, just append $context
# to the parent context, and immediately pop back to
# the parent context
if ($singleton)
{
push @{$context[$#context]}, $context;
$context = pop @context;
}
} else {
print STDERR "Can't parse [$_]\n";
}
}
&dumptree(\@tree);
# dumptree
# Dump the tree, for debugging
sub dumptree
{
my $tree = shift; # The tree or subtree to dump
my $indent = shift; # Text to print at the beginning
# of each line, for nested output
if (!defined($indent))
{
$indent = "";
}
print "${indent}Name: [$tree->[0]]\n";
print "${indent}Attr: [$tree->[1]]\n";
for (my $i = 2; $i <= $#{$tree}; $i++)
{
my $child = $tree->[$i];
if (ref($child) eq "")
{
print "${indent} [$child]\n";
next;
}
if (ref($child) eq "ARRAY")
{
&dumptree($child, "${indent} ");
next;
}
print STDERR "Unexpected node type for child $i: ",
ref($child),
"\n";
}
print "${indent}End: [$tree->[0]]\n";
}
# lookup($tree_ref, $node1, $node2, ... $nodeN)
# Looks up a node in the tree $tree (as parsed by the big block at the
# top). The $nodeN arguments comprise a pathname:
# &lookup($tree_ref, "a", "b", "c");
# looks up the "c" element inside the "b" element inside the "a" element.
# "a" must be the root of the tree.
#
# This function actually returns a list: &lookup($tree, "foo"); will
# look up all of the "foo" elements inside $tree, and return them all
# as an array.
sub lookup
{
my $tree = shift; # Tree to look in
my $name = shift; # First tag name to look for
if ($tree->[0] ne $name)
{
# This node doesn't match what we're looking for
return ();
}
# The name matches. Good. Do we need to look for a child tag?
if (@_ == 0)
{
# We're at the end of the search. Return the current tree
return ($tree);
}
my @retval = ();
# Loop over children of this node
for (my $i = 2; $i <= $#{$tree}; $i++)
{
my $child = $tree->[$i];
next if ref($child) eq ""; # Ignore strings
push @retval, &lookup($child, @_);
}
return @retval;
}