package getcode; ;###################################################################### ;# getcode.pl ;# ;# 半角カナのコード系判別をめざして ;# jcode.pl から getcode を取り出して改変し作成した。 ;# ;# Modified by gama <gama@mvg.biglobe.ne.jp> ;# ;# 以下の2点を改変した。 ;# jis 半角カナの奇数個の連続を sjis と判定 ;# sjis, euc の判定へ EUC半角カナ,ascii,sjis ank を加味 ;# ;# 1998/03/17 v 0.01 ;# jcode.pl v 2.6 から、getcode.pl v 0.01 ;# ;; $rcsid = q$Id: getcode.pl,v 0.01 1998/03/17 gama Exp $; ;###################################################################### ;# ;# INTERFACE: ;# ;# &getcode'getcode(*line) ;# Return 'jis', 'sjis', 'euc' or undef according to ;# Japanese character code in $line. Return 'binary' if ;# the data has non-character code. ;# ;###################################################################### ;# 以下はオリジナルの jcode.pl v 2.6 の Copyright 表示です。 ;##################################################################### ;# ;# jcode.pl: Perl library for Japanese character code conversion ;# ;# Copyright (c) 1995,1996,1997 Kazumasa Utashiro <utashiro@iij.ad.jp> ;# Internet Initiative Japan Inc. ;# 3-13 Kanda Nishiki-cho, Chiyoda-ku, Tokyo 101, Japan ;# ;# Copyright (c) 1992,1993,1994 Kazumasa Utashiro ;# Software Research Associates, Inc. ;# ;# Original version was developed under the name of srekcah@sra.co.jp ;# February 1992 and it was called kconv.pl at the beginning. This ;# address was a pen name for group of individuals and it is no longer ;# valid. ;# ;# Use and redistribution for ANY PURPOSE, without significant ;# modification, is granted as long as all copyright notices are ;# retained. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ;# ANY EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED. ;# ;# $rcsid = q$Id: jcode.pl,v 2.6 1997/12/24 17:01:11 utashiro Exp $; ;###################################################################### &init unless defined $version; ;# ;# Initialize variables. ;# sub init { $version = $rcsid =~ /,v ([\d.]+)/ ? $1 : 'unkown'; $re_bin = '[\000-\006\177\377]'; $re_jis1978 = '\e\$\@'; $re_jis1983 = '\e\$B'; $re_jis1990 = '\e&\@\e\$B'; $re_jp = "$re_jis1978|$re_jis1983|$re_jis1990"; $re_asc = '\e\([BJ]'; $re_kana = '\e\(I'; $re_ascii = '[\007-\176]'; $re_odd_kana = '[\241-\337]([\241-\337][\241-\337])*'; $re_sjis_c = '[\201-\237\340-\374][\100-\176\200-\374]'; $re_sjis_ank = '[\007-\176\241-\337]'; $re_euc_c = '[\241-\376][\241-\376]'; $re_euc_kana = '\216[\241-\337]'; } ;# ;# Recognize character code. ;# sub getcode { local(*_) = @_; local($matched, $code); if (!/[\e\200-\377]/) { # not Japanese $matched = 0; $code = undef; } # 'jis' elsif (/$re_jp|$re_asc|$re_kana/o) { $matched = 1; $code = 'jis'; } elsif (/$re_bin/o) { # 'binary' $matched = 0; $code = 'binary'; } elsif (/(^|[\000-\177])$re_odd_kana($|[\000-\177])/go) { # 'sjis' jis 半角カナの奇数個の連続 $matched = 1; $code = 'sjis'; } else { # should be 'euc' or 'sjis' local($sjis, $euc); $sjis += length($&) while /($re_sjis_c|$re_sjis_ank)+/go; $euc += length($&) while /($re_euc_c|$re_euc_kana|$re_ascii)+/go; $matched = &max($sjis, $euc); $code = ('euc', undef, 'sjis')[($sjis<=>$euc) + $[ + 1]; } wantarray ? ($matched, $code) : $code; } sub max { $_[ $[ + ($_[$[] < $_[$[+1]) ]; } 1;