#!/usr/bin/perl -w
use XML::Simple;
use HTML::TreeBuilder;
for my $file (@ARGV) {
my $parse = XMLin($file);
for my $entry (@{$parse->{entry}}) {
for my $chunk (values %$entry) {
my $tree = HTML::TreeBuilder->new;
$tree->parse($chunk);
$tree->eof();
elt_count_words($tree);
$tree->delete();
}
}
}
sub elt_count_words
{
my ($root) = @_;
if (ref($root)) {
my @content = $root->content_list();
for my $elt (@content) {
elt_count_words($elt);
}
} else {
for my $word (split /[^\w\d\']+/, $root) {
next unless $word;
$freq{lc $word}++;
}
}
}
for my $word (reverse sort {$freq{$a} <=> $freq{$b}} keys %freq) {
print "$word: $freq{$word}\n"
}