#!/usr/bin/perl -w use XML::Simple; use HTML::TreeBuilder; for my $file (@ARGV) { my $parse = XMLin($file); for my $entry (@{$parse->{entry}}) { for my $chunk (values %$entry) { my $tree = HTML::TreeBuilder->new; $tree->parse($chunk); $tree->eof(); elt_count_words($tree); $tree->delete(); } } } sub elt_count_words { my ($root) = @_; if (ref($root)) { my @content = $root->content_list(); for my $elt (@content) { elt_count_words($elt); } } else { for my $word (split /[^\w\d\']+/, $root) { next unless $word; $freq{lc $word}++; } } } for my $word (reverse sort {$freq{$a} <=> $freq{$b}} keys %freq) { print "$word: $freq{$word}\n" }