package getcode;
;######################################################################
;# getcode.pl
;#
;# 半角カナのコード系判別をめざして
;# jcode.pl から getcode を取り出して改変し作成した。
;#
;# Modified by gama <gama@mvg.biglobe.ne.jp>
;#
;# 以下の2点を改変した。
;# jis 半角カナの奇数個の連続を sjis と判定
;# sjis, euc の判定へ EUC半角カナ,ascii,sjis ank を加味
;#
;# 1998/03/17 v 0.01
;# jcode.pl v 2.6 から、getcode.pl v 0.01
;#
;; $rcsid = q$Id: getcode.pl,v 0.01 1998/03/17 gama Exp $;
;######################################################################
;#
;# INTERFACE:
;#
;# &getcode'getcode(*line)
;# Return 'jis', 'sjis', 'euc' or undef according to
;# Japanese character code in $line. Return 'binary' if
;# the data has non-character code.
;#
;######################################################################
;# 以下はオリジナルの jcode.pl v 2.6 の Copyright 表示です。
;#####################################################################
;#
;# jcode.pl: Perl library for Japanese character code conversion
;#
;# Copyright (c) 1995,1996,1997 Kazumasa Utashiro <utashiro@iij.ad.jp>
;# Internet Initiative Japan Inc.
;# 3-13 Kanda Nishiki-cho, Chiyoda-ku, Tokyo 101, Japan
;#
;# Copyright (c) 1992,1993,1994 Kazumasa Utashiro
;# Software Research Associates, Inc.
;#
;# Original version was developed under the name of srekcah@sra.co.jp
;# February 1992 and it was called kconv.pl at the beginning. This
;# address was a pen name for group of individuals and it is no longer
;# valid.
;#
;# Use and redistribution for ANY PURPOSE, without significant
;# modification, is granted as long as all copyright notices are
;# retained. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
;# ANY EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED.
;#
;# $rcsid = q$Id: jcode.pl,v 2.6 1997/12/24 17:01:11 utashiro Exp $;
;######################################################################
&init unless defined $version;
;#
;# Initialize variables.
;#
sub init {
$version = $rcsid =~ /,v ([\d.]+)/ ? $1 : 'unkown';
$re_bin = '[\000-\006\177\377]';
$re_jis1978 = '\e\$\@';
$re_jis1983 = '\e\$B';
$re_jis1990 = '\e&\@\e\$B';
$re_jp = "$re_jis1978|$re_jis1983|$re_jis1990";
$re_asc = '\e\([BJ]';
$re_kana = '\e\(I';
$re_ascii = '[\007-\176]';
$re_odd_kana = '[\241-\337]([\241-\337][\241-\337])*';
$re_sjis_c = '[\201-\237\340-\374][\100-\176\200-\374]';
$re_sjis_ank = '[\007-\176\241-\337]';
$re_euc_c = '[\241-\376][\241-\376]';
$re_euc_kana = '\216[\241-\337]';
}
;#
;# Recognize character code.
;#
sub getcode {
local(*_) = @_;
local($matched, $code);
if (!/[\e\200-\377]/) { # not Japanese
$matched = 0;
$code = undef;
} # 'jis'
elsif (/$re_jp|$re_asc|$re_kana/o) {
$matched = 1;
$code = 'jis';
}
elsif (/$re_bin/o) { # 'binary'
$matched = 0;
$code = 'binary';
}
elsif (/(^|[\000-\177])$re_odd_kana($|[\000-\177])/go) {
# 'sjis' jis 半角カナの奇数個の連続
$matched = 1;
$code = 'sjis';
}
else { # should be 'euc' or 'sjis'
local($sjis, $euc);
$sjis += length($&) while /($re_sjis_c|$re_sjis_ank)+/go;
$euc += length($&) while /($re_euc_c|$re_euc_kana|$re_ascii)+/go;
$matched = &max($sjis, $euc);
$code = ('euc', undef, 'sjis')[($sjis<=>$euc) + $[ + 1];
}
wantarray ? ($matched, $code) : $code;
}
sub max { $_[ $[ + ($_[$[] < $_[$[+1]) ]; }
1;