package Encode::CP932Family; use strict; use Encode qw(find_encoding); use base qw(Encode::Encoding); use Encode::CJKConstants qw(%RE); require 'jcode.pl'; for my $name ('cp51932', 'cp50220', 'cp50221') { my $h2z = ($name eq 'cp50220') ? 'z' : undef; my $jcode = ($name eq 'cp51932') ? 'euc' : 'jis'; $Encode::Encoding{$name} = bless { Name => $name, h2z => $h2z, jcode => $jcode, } => __PACKAGE__; } my $cp932 = find_encoding('cp932'); sub needs_lines { my $obj = shift; ($obj->{Name} eq 'cp51932') ? 0 : 1; } sub decode($$;$) { local ${^ENCODING}; my ($obj, $octet, $check) = @_; my $str = $cp932->decode(jcode::to('sjis', $octet, $obj->{jcode})); $_[1] = '' if $check; return $str; } sub encode($$;$) { my ($obj, $str, $check) = @_; my $octet = jcode::to($obj->{jcode}, ibm2nec($cp932->encode($str)), 'sjis', $obj->{h2z}); $_[1] = '' if $check; return $octet; } # # IBM拡張文字 (115区-119区) → NEC選定IBM拡張文字 (89区-92区) # my $re_sjis = "$RE{ASCII}|$RE{JIS_KANA}|$RE{SJIS_C}"; my $re_ibm = '[\xfa-\xfc][\x40-\x7e\x80-\xfc]'; sub ibm2nec { my $octet = shift; $octet =~ s/\G((?:$re_sjis)*?)($re_ibm)/$1 . _ibm2nec($2)/geo; return $octet; } sub _ibm2nec { my ($c1, $c2) = unpack('CC', shift); my $linear = $c1 * 188 + (($c2 < 0x7f) ? ($c2 - 0x40) : ($c2 - 0x41)); if (0xb7b4 <= $linear) { # IBM拡張漢字 $linear -= (0xb7b4 - 0xae0c); } elsif (0xb7ad <= $linear) { # BROKEN BAR など $linear -= (0xb7ad - 0xaf81); } else { # 小文字のローマ数字 $linear -= (0xb798 - 0xaf76); } $c1 = int($linear / 188); $c2 = $linear % 188 + 0x40; $c2++ if ($c2 > 0x7e); return pack('CC', ($c1, $c2)); } 1; __END__