個人的なメモを記していくためのページです。
一行一単語を入力として'_'やcamel caseで区切られた単語のKWIC風見出しを出力するperlスクリプト
use strict; my %word; my %key; my (@a,@b); $"=""; while(<>){ chop; $word{$_}=1; @a=split(/(?:_|(?<=[a-z0-9])(?=[A-Z]))/,$_); foreach my $k(@a){ $key{$k}=1 if(length $k>1); } } foreach my $i( sort { lc $a cmp lc $b || $a cmp $b } keys %key){ print "==== $i ====\n"; foreach ( sort { lc $a cmp lc $b || $a cmp $b } keys %word){ if( /$i/ ){ my @a=split(/(?:(?=_)|(?<=_)|(?<=[a-z0-9])(?=[A-Z]))/,$_); my $j; for( $j=0; $j <@a;++$j){ if( $a[$j] eq $i ){ printf "%20s%s\n","@a[0..$j-1]","@a[$j..$#a]"; } } # while( /(\A|(?<=[_a-z0-9])(?=[A-Z])|(?<=_)(?=[A-Za-z0-9]))/g){ # my($l,$r)=($`,$'); # printf "%20s%s\n", $l, $r if( $r=~/^$i/); # } } } }例
$ cat foo aSampleTest Sample_test_word word_Sample_test test_test_test $ perl camelKWIC.pl <foo ==== Sample ==== aSampleTest Sample_test_word word_Sample_test ==== Test ==== aSampleTest ==== test ==== Sample_test_word test_test_test test_test_test test_test_test word_Sample_test ==== word ==== Sample_test_word word_Sample_test