# No.50 Perl Sample (code key - 78) # Programs: compare words in two files (複数ファイル間の単語の出現頻度を比べる) # Tomonori Nagano # Last Update: January 18, 2008 # # This file is encoded in Unicode (UTF-8). If you see gibberish characters, # please re-encode the file in utf-8. # my @files = @ARGV ; for ($i=0; $i<=$#files; $i++){ open (INFILE1,$files[$i]) or die "cannot open $files[$i]: $!" ; while(){ chomp ; tr/A-Z/a-z/ ; # normalize to lower case s/([^\- a-zA-Z0-9])/ \1 /g ; # a negative definition of punctuation my @words = split ; foreach (@words) { $nToken[$i]++ ; ${$seen[$i]}{$_}++ ; $seen{$_}++ ; } } } for ($i=0; $i<=$#files;$i++){ print "num of words in the".$i."st/th file: $nToken[$i]\n" ; } print "ID\tWord\t" ; for (my $i=0; $i<=$#files; $i++) { my $fNum = $i+1 ; print $fNum."th file\t" ; } print "\n" ; print "----------------------------------------------------------------------\n" ; my $j = 1 ; foreach my $key (sort keys %seen) { print "$j\t$key\t" ; for (my $i=0; $i<=$#files; $i++ ){ if (!defined ${$seen[$i]}{$key}) { print "0\t" ; } else { print "${$seen[$i]}{$key}\t" ; } } print "\n" ; $j++ ; }