# No.32 Perl Sample (code key - 63) # Learning Perl: Count Word Types 4(単語のタイプ数と確率を数える) # Tomonori Nagano # Last Update: January 18, 2008 # # This file is encoded in Unicode (UTF-8). If you see gibberish characters, # please re-encode the file in utf-8. # # this sample script is simplified for pedagogical purposes. Those who are # interested in advanced Perl programming are encouraged to consult with # relevant sections of "Perl Cookbook" by Christiansen & Torkington my $numWords= 0 ; # define the global variables while(<>){ # this reads the default file line by line tr/A-Z/a-z/ ; # normalize to lower case s/([^\- a-zA-Z0-9])/ \1 /g ;# a negative definition of punctuation my @words = split ; # split $_ into each word; alternatively use split (" ",$_) foreach (@words) { # process each word next if (/[^a-zA-Z]/) ; # skip if a word contains non-alphabet char $seen{$_}++ ; # increase type frequency $numWords++ ; # increase the count eveyr time } } foreach $key (sort { $seen{$b} <=> $seen{$a}; } keys %seen){ print "$key\t$seen{$key}\t".$seen{$key}/$numWords."\n" ; }