# No.29 Perl Sample (code key - 41) # Learning Perl: Count Word Types 1 (単語のタイプ数を数える) # Tomonori Nagano # Last Update: January 18, 2008 # # This file is encoded in Unicode (UTF-8). If you see gibberish characters, # please re-encode the file in utf-8. # # this sample script is simplified for pedagogical purposes. Those who are # interested in advanced Perl programming are encouraged to consult with # relevant sections of "Perl Cookbook" by Christiansen & Torkington my $numTypes = 0 ; while(<>){ # this reads the default file line by line chomp ; # delete the end-of-line character $_ = lc($_) ; # change to lower case s/([^\- a-zA-Z0-9])/ \1 /g; # a negative definition of punctuation @words = split (" ",$_) ; # split into words foreach (@words) { # process each word next if /[^a-zA-Z]/ ; # skip if words contain non-alphabet if (!defined $seen{$_}) { # if you haven't seen the word before $numTypes++ ; # increase the type frequency $seen{$_} = 1 ; # define a new hash key } } } print "numTypes\t$numTypes\n" ;