# No.30 Perl Sample (code key - 42) # Learning Perl: Count Word Types 2(単語のタイプ数を数える) # Tomonori Nagano # Last Update: January 18, 2008 # # This file is encoded in Unicode (UTF-8). If you see gibberish characters, # please re-encode the file in utf-8. # # this sample script is simplified for pedagogical purposes. Those who are # interested in advanced Perl programming are encouraged to consult with # relevant sections of "Perl Cookbook" by Christiansen & Torkington my $numTypes = 0 ; while(<>){ # this reads the default file line by line chomp ; # delete the end-of-line character $_ = lc($_) ; # change to lower case s/([^\- a-zA-Z0-9])/ \1 /g; # a negative definition of punctuation @words = split (" ",$_) ; # split into words foreach (@words) { # process each word next if /[^a-zA-Z]/ ; # skip if words contain non-alphabet if (!exists $seen{$_}) { # if the hash with a word key dosn't exitst $seen{$_} = 1 ; # define a word as a new hash key with freq of 1 $numTypes++ ; # increase the type num everytime defining a new key } else { # otherwise $seen{$_}++ ; # increase its frequency } } } print "numTypes\t$numTypes\n\n" ; print "----------------alphabetical ascending order----------------\n"; foreach my $key (sort { $a cmp $b; } keys %seen) { print "$key\t$seen{$key}\n"; } print "----------------alphabetical descending order----------------\n"; foreach my $key (sort { $b cmp $a; } keys %seen){ print "$key\t$seen{$key}\n"; } print "----------------numerical ascending order----------------\n"; foreach $key (sort { $seen{$a} <=> $seen{$b}; } keys %seen){ print "$key $seen{$key}\n"; } print "----------------numerical descending order----------------\n"; foreach $key (sort { $seen{$b} <=> $seen{$a}; } keys %seen){ print "$key\t$seen{$key}\n"; }