# No.21 Perl Sample (code key - 56) # Learning Perl: count bigrams 1 (バイグラムの数を数える) # Tomonori Nagano # Last Update: January 18, 2008 # # This file is encoded in Unicode (UTF-8). If you see gibberish characters, # please re-encode the file in utf-8. # # this sample script is simplified for pedagogical purposes. Those who are # interested in advanced Perl programming are encouraged to consult with # relevant sections of "Perl Cookbook" by Christiansen & Torkington my @words ; # define the global variables my $numWords = 0 ; my $numBigrams= 0 ; while(<>){ # this reads the default file line by line tr/A-Z/a-z/ ; # normalize to lower case s/([^\- a-zA-Z0-9])/ \1 /g ;# a negative definition of punctuation push (@words,split(" ",$_)) ; # split $_ into each word and store it in @words $numWords = $#words+1 ; # index+1 is the number of words } push (@words,'') ; # add a document initial tag for (my $i=0;$i<$#words;$i++) { # process each word my $tempBigram = "$words[$i] $words[$i+1]" ;# combine two consecutive words $numBigrams++ ; # increase the count eveyr time } print "numWords\t$numWords\n" ; print "numBigrams\t$numBigrams\n" ;