#!/usr/bin/perl -w
# uniqueXPaths.pl reports all the unique element paths and their
# frequencies of the XML file supplied as the only mandatory
# argument on the command line.
#
# Copyright 2003, Ramiro Gómez.
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
use strict;
use XML::Parser;
use utf8; # Unicode support (not needed with Perl 5.8)
die "Usage: $0 XMLfile" unless @ARGV;
my $xmlfile = shift;
my %paths; # Hash that stores XPaths and their frequencies
# create parser object
my $parser = new XML::Parser('Handlers' => {
'Start' => \&h_start
});
$parser->parsefile($xmlfile);
# report variables
my ($path, $freq);
# define report format
format STDOUT_TOP =
Element Paths Page: @>>>>>
$%
Frequency Path
------------------------------------------------------------------
.
format STDOUT =
@>>>>>>>> @*
$freq, $path
.
# print report
map {
$path = $_;
$freq = $paths{$_};
write;
} sort keys %paths;
# Event handler for start elements
sub h_start {
my ($expat, $element) = @_; # $expat = expat parser object
my $path;
# 'context()' returns a list of element names that represent open elements
if ($expat->context()) {
$path = '/' . join( '/', ( $expat->context() ) ) . "/$element";
$paths{$path}++;
} else {
$path = "/$element";
$paths{$path}++;
}
}
uniqueXPaths.pl
By ramiro - Posted on November 3rd, 2004
Post new comment