#!/usr/bin/perl # # 日本語版AWStats用検索キーワードデコード + Alphaスクリプト # # 検索エンジン別の細かな処理は, Hobbit氏の手による改造です. # (http://homepage2.nifty.com/hobbit/html/awstats.html) # #use lib('/home/webmaster/public_html/awstats/bin'); use Jcode; use Unicode::Japanese; $GoogleCurrentCacheID = "abcdefghijkl"; # googleのcache判定変数ダミー初期値. $GoogleLastCacheID = "mnopqrstuvwx"; # googleのcache判定変数ダミー初期値. $YahooCurrentCacheID = "abcdefghij"; # yahooのcache判定変数ダミー初期値. $YahooLastCacheID = "klmnopqrst"; # yahooのcache判定変数ダミー初期値. $MySearchCurrentCacheID = "abcdefghij"; # yahooのcache判定変数ダミー初期値. $MySearchLastCacheID = "klmnopqrst"; # yahooのcache判定変数ダミー初期値. while(){ if (/\\x90\\x02\\xb1\\x02\\xb1\\x02\\xb1\\x02\\xb1/ | /NULL\.IDA\?CCCCCCCCCCCCCCCC/) { next; } # googleのキャッシュからのアクセスを、本来に形に戻す. if ((/http.*search\?q\=cache/) || (/216\.239\.(39\.104|41\.104|57\.104|59\.104|63\.104)/) || (/64\.233\.(161\.104|179\.104|167\.104|187\.104)/) || (/66\.102\.(7\.104|9\.104|11\.104)/)){ # 一連のcacheかどうかをまず判断する. /(cache\:)([^\:]+)(\:)/; # CacheIDの検出 $GoogleCurrentCacheID = $2; if ($GoogleCurrentCacheID ne $GoogleLastCacheID) { # 初出 $GoogleLastCacheID = $2; s/cache.*?\+//; # 普通のgoogleの引数に変換. & DecodeEncodedStringIMGOOUTF8; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif ($GoogleCurrentCacheID eq $GoogleLastCacheID) { # 既出 s/http.*?google.*?cache\:.*?\://; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; # ImageファイルのGETが、その本体となるhtmlのGETより # 先に来る場合があるのは御愛敬(^^; } } elsif (/http.*cache\.yahoofs/){ # 一連のcacheかどうかをまず判断する. /(\&d\=)(\w{10})/; # CacheIDの検出 $YahooCurrentCacheID = $2; if ($YahooCurrentCacheID ne $YahooLastCacheID) { # 初出 $YahooLastCacheID = $2; # HTMLファイルのGETを強引に生成, 検索語を検出可能にする s/(.*?\"GET )(.*)( HTTP.*?\?u\=)([^\/]*)(.*)(\&w\=.*)/$1$5$3$4$5$6/; & DecodeEncodedStringYAHOOFSUTF8; & DecodeEncodedStringYAHOO; $_ = &ConvertToSJIS($_); print; print "\n"; } } elsif (/http.*kd\.mysearch\.myway\.com\/jsp\/GGcres\.jsp/){ # 一連のcacheかどうかをまず判断する. /(id\=)(\w{12})/; # CacheIDの検出 $MySearchCurrentCacheID = $2; if ($MySearchCurrentCacheID ne $MySearchLastCacheID) { # 初出 $MySearchLastCacheID = $2; # HTMLファイルのGETを強引に生成, 検索語を検出可能にする s/(.*?\"GET )(.*)( HTTP.*?\?u\=)([^\/]*)(.*)(\&searchfor\=.*)/$1$5$3$4$5$6/; & DecodeEncodedStringYAHOOFSUTF8; & DecodeEncodedStringYAHOO; $_ = &ConvertToSJIS($_); print; print "\n"; } } elsif (/http.*kr\.search\.yahoo\./){ & DecodeEncodedStringYAHOOkr; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*hk\.search\.yahoo\./){ & DecodeEncodedStringYAHOOhk; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*search\.yahoo\.co\.jp/){ & DecodeEncodedStringYAHOOjp; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*search\.yahoo\./ || /216\.109\.125\.130/){ & DecodeEncodedStringYAHOOUTF8; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*bsearch\.goo\.ne/){ & DecodeEncodedStringUTF8; & DecodeEncodedStringGOO; & DecodeEncodedStringGOOMT; & DecodeEncodedStringGOOMT; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*images\.google\./){ & DecodeEncodedStringUTF8; & DecodeEncodedStringIMGOOUTF8; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif ((/http.*www\.google\./)){ & DecodeEncodedStringUTF8google; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*search\.msn\.com/){ & DecodeEncodedStringMSNcomUTF8; & DecodeEncodedStringMSN; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*search\.msn\./){ & DecodeEncodedStringMSNUTF8; & DecodeEncodedStringMSN; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*excite\.co\.jp/){ & DecodeEncodedStringExcite; $_ = &ConvertToSJIS($_); print; print "\n"; } elsif (/http.*excite\.co\.jp/){ & DecodeEncodedStringExcite; $_ = &ConvertToSJIS($_); print; print "\n"; } else { & DecodeEncodedString; $_ = &ConvertToSJIS($_); print; print "\n"; } } sub ConvertToSJIS { my($text); $text = Unicode::Japanese->new($_)->sjis; return $text; } sub DecodeEncodedStringExcite { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Jcode::convert(\$_,'utf8',sjis); return; } sub DecodeEncodedString { chomp; s/\\x/%/g; #googleで"lr=lang_ja"時のデコード対応 s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Jcode::convert(\$_,'utf8'); s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Jcode::convert(\$_,'utf8'); # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringIMGOOUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if (/ie\=Shift_JIS/i) { Jcode::convert(\$_,utf8,sjis); } elsif (/ie\=EUC-JP/i) { Jcode::convert(\$_,utf8,euc); } # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringYAHOOUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if (/ei\=UTF-8/) { ; } else { Jcode::convert(\$_,'utf8'); } # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringYAHOOhk { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if (/ei\=BIG5/) { use Encode; Encode::from_to($_, "big5", "utf8"); } return; } sub DecodeEncodedStringYAHOOkr { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Encode::from_to($_, "euc-kr", "utf8"); return; } sub DecodeEncodedStringYAHOOjp { s/\\x/%/g; chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Jcode::convert(\$_, "utf8"); return; } sub DecodeEncodedStringYAHOOFSUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; # Jcode::convert(\$_,'utf8'); # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringUTF8google { chomp; s/\\x/%/g; #googleで"lr=lang_ja"時のデコード対応 if ($_ =~ /q\=([^\&]+)/) { $a = $1; $b = $a; $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if ((/ie\=Shift_JIS/)|| (/ie\=ShiftJIS/) || (/ie\=sjis/i)) { $c = Jcode::convert($a,utf8,sjis); } elsif (/ie\=EUC-JP/i) { $c = Jcode::convert($a,utf8,euc); } elsif (/ie\=windows-1251/i) { $c = $a; Encode::from_to($c,"windows-1251",utf8); } elsif (/ie\=ISO-8859-2/i) { $c = $a; Encode::from_to($c,"ISO-8859-2",utf8); } elsif (/ie\=ISO-8859-1/i) { $c = $a; Encode::from_to($c,"ISO-8859-1",utf8); } elsif (/ie\=UTF-8/i) { $c = $a; } elsif (/hl\=ja/) { $code = Jcode::getcode( $a ); if ($code eq "") { $c = $a; } else { $c = Jcode::convert($a,utf8); } } else { $c = $a; } $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } if ($_ =~ /G\=([^\&]+)/) { $a = $1; $b = $a; $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if ((/ie\=Shift_JIS/)|| (/ie\=ShiftJIS/) || (/ie\=sjis/i)) { $c = Jcode::convert($a,utf8,sjis); } elsif (/ie\=EUC-JP/i) { $c = Jcode::convert($a,utf8,euc); } elsif (/ie\=windows-1251/i) { $c = $a; Encode::from_to($c,"windows-1251",utf8); } elsif (/ie\=ISO-8859-1/i) { $c = $a; Encode::from_to($c,"ISO-8859-1",utf8); } elsif (/ie\=ISO-8859-2/i) { $c = $a; Encode::from_to($c,"ISO-8859-2",utf8); } elsif (/hl\=ja/) { $code = Jcode::getcode( $a ); if ($code eq "") { $c = $a; } else { $c = Jcode::convert($a,utf8); } } else { $c = $a; } $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } return; } sub DecodeEncodedStringYAHOO { chomp; if ($_ =~ /p\=([^\&]+)/) { $a = $1; $b = $a; $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if (/ei\=UTF-8/) { $c = $a; } else { $c = Jcode::convert($a,utf8,euc); } $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } return; } sub DecodeEncodedStringGOO { chomp; if ($_ =~ /\&MT\=([^\&]+)/) { $a = $1; $b = $a; if ($b =~ /%/) { $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; $c = Jcode::convert($a,utf8,euc); $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } } return; } sub DecodeEncodedStringGOOUEMT { chomp; if ($_ =~ /UEMT\=(%[^\&]+)/) { $a = $1; $b = $a; if ($b =~ /%/) { $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; $c = Jcode::convert($a,utf8,euc); $c =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; $c = Jcode::convert($c,utf8,euc); $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } } return; } sub DecodeEncodedStringGOOMT { chomp; if ($_ =~ /MT\=(%[^\&]+)/) { $a = $1; $b = $a; if ($b =~ /%/) { $a =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; $c = Jcode::convert($a,utf8,euc); $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$c/; } } return; } sub DecodeEncodedStringMSNcomUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; Jcode::convert(\$_,utf8,sjis); # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringMSNUTF8 { chomp; s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("H2", $1)/eg; if (/cp\=932/) { Jcode::convert(\$_,utf8,sjis); } # s/([\x80-\xff][\x80-\xff]|[\x00-\x7f])/($1 eq "\xa1\xa1") ? " " : $1/eg; return; } sub DecodeEncodedStringMSN { chomp; if ($_ =~ /aq\=([^\&]+)/) { $a = $1; $b = $a; if ($b =~ /%/) { $a =~ s/%([0-9A-F][0-9A-F])/pack("H2", $1)/eg; $b =~ s/([\+\*\.\?\^\$\[\-\]\|\(\)\\])/\\$1/g; $_ =~ s/$b/$a/; } } return; }