hudeyong926 发表于 2013-1-19 04:14:24

Discuz字符编码转换类2

<?php/** *       (C)2001-2099 Comsenz Inc. *      This is NOT a freeware, use is subject to license terms * *      $Id: class_chinese.php 6757 2010-03-25 09:01:29Z cnteacher $ */define('CODETABLE_DIR', DISCUZ_ROOT.'./source/include/table/');class Chinese {var $table = '';var $iconv_enabled = false;var $convertbig5 = false;var $unicode_table = array();var $config=array ('SourceLang'=> '','TargetLang'=> '','GBtoUnicode_table'=> 'gb-unicode.table','BIG5toUnicode_table'=> 'big5-unicode.table','GBtoBIG5_table'   => 'gb-big5.table',);function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {$this->config['SourceLang'] = $this->_lang($SourceLang);$this->config['TargetLang'] = $this->_lang($TargetLang);if(ICONV_ENABLE && $this->config['TargetLang'] != 'BIG5' && !$ForceTable) {$this->iconv_enabled = true;} else {$this->iconv_enabled = false;$this->OpenTable();}}function _lang($LangCode) {$LangCode = strtoupper($LangCode);if(substr($LangCode, 0, 2) == 'GB') {return 'GBK';} elseif(substr($LangCode, 0, 3) == 'BIG') {return 'BIG5';} elseif(substr($LangCode, 0, 3) == 'UTF') {return 'UTF-8';} elseif(substr($LangCode, 0, 3) == 'UNI') {return 'UNICODE';}}function _hex2bin($hexdata) {for($i=0; $i < strlen($hexdata); $i += 2) {$bindata .= chr(hexdec(substr($hexdata, $i, 2)));}return $bindata;}function OpenTable() {$this->unicode_table = array();if(!$this->iconv_enabled && $this->config['TargetLang'] == 'BIG5') {$this->config['TargetLang'] = 'GBK';$this->convertbig5 = TRUE;}if($this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {$this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];} elseif($this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {$this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];}$fp = fopen($this->table, 'rb');$tabletmp = fread($fp, filesize($this->table));for($i = 0; $i < strlen($tabletmp); $i += 4) {$tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));if($this->config['TargetLang'] == 'UTF-8') {$this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);} elseif($this->config['SourceLang'] == 'UTF-8') {$this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);} elseif($this->config['TargetLang'] == 'UNICODE') {$this->unicode_table[$tmp['key']] = dechex($tmp['value']);}}}function CHSUtoUTF8($c) {$str = '';if($c < 0x80) {$str .= $c;} elseif($c < 0x800) {$str .= (0xC0 | $c >> 6);$str .= (0x80 | $c & 0x3F);} elseif($c < 0x10000) {$str .= (0xE0 | $c >> 12);$str .= (0x80 | $c >> 6 & 0x3F);$str .=( 0x80 | $c & 0x3F);} elseif($c < 0x200000) {$str .= (0xF0 | $c >> 18);$str .= (0x80 | $c >> 12 & 0x3F);$str .= (0x80 | $c >> 6 & 0x3F);$str .= (0x80 | $c & 0x3F);}return $str;}function GB2312toBIG5($c) {$f = fopen(CODETABLE_DIR.$this->config['GBtoBIG5_table'], 'r');$max=strlen($c)-1;for($i = 0;$i < $max;$i++){$h=ord($c[$i]);if($h>=160) {$l=ord($c[$i+1]);if($h==161 && $l==64){$gb="";} else{fseek($f,($h-160)*510+($l-1)*2);$gb=fread($f,2);}$c[$i]=$gb;$c[$i+1]=$gb;$i++;}}$result = $c;return $result;}function Convert($SourceText) {if($this->config['SourceLang'] == $this->config['TargetLang']) {return $SourceText;} elseif($this->iconv_enabled) {if($this->config['TargetLang'] <> 'UNICODE') {return iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);} else {$return = '';while($SourceText != '') {if(ord(substr($SourceText, 0, 1)) > 127) {$return .= "&#x".dechex($this->Utf8_Unicode(iconv($this->config['SourceLang'],"UTF-8", substr($SourceText, 0, 2)))).";";$SourceText = substr($SourceText, 2, strlen($SourceText));} else {$return .= substr($SourceText, 0, 1);$SourceText = substr($SourceText, 1, strlen($SourceText));}}return $return;}} elseif($this->config['TargetLang'] == 'UNICODE') {$utf = '';while($SourceText != '') {if(ord(substr($SourceText, 0, 1)) > 127) {if($this->config['SourceLang'] == 'GBK') {$utf .= '&#x'.$this->unicode_table.';';} elseif($this->config['SourceLang'] == 'BIG5') {$utf .= '&#x'.$this->unicode_table.';';}$SourceText = substr($SourceText, 2, strlen($SourceText));} else {$utf .= substr($SourceText, 0, 1);$SourceText = substr($SourceText, 1, strlen($SourceText));}}return $utf;} else {$ret = '';if($this->config['SourceLang'] == 'UTF-8') {$out = '';$len = strlen($SourceText);$i = 0;while($i < $len) {$c = ord(substr($SourceText, $i++, 1));switch($c >> 4) {case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:$out .= substr($SourceText, $i - 1, 1);break;case 12: case 13:$char2 = ord(substr($SourceText, $i++, 1));$char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];if($this->config['TargetLang'] == 'GBK') {$out .= $this->_hex2bin(dechex($char3 + 0x8080));} elseif($this->config['TargetLang'] == 'BIG5') {$out .= $this->_hex2bin($char3);}break;case 14:$char2 = ord(substr($SourceText, $i++, 1));$char3 = ord(substr($SourceText, $i++, 1));$char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];if($this->config['TargetLang'] == 'GBK') {$out .= $this->_hex2bin(dechex($char4 + 0x8080));} elseif($this->config['TargetLang'] == 'BIG5') {$out .= $this->_hex2bin($char4);}break;}}return !$this->convertbig5 ? $out : $this->GB2312toBIG5($out);} else {while($SourceText != '') {if(ord(substr($SourceText, 0, 1)) > 127) {if($this->config['SourceLang'] == 'BIG5') {$utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table));} elseif($this->config['SourceLang'] == 'GBK') {$utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table));}for($i = 0; $i < strlen($utf8); $i += 3) {$ret .= chr(substr($utf8, $i, 3));}$SourceText = substr($SourceText, 2, strlen($SourceText));} else {$ret .= substr($SourceText, 0, 1);$SourceText = substr($SourceText, 1, strlen($SourceText));}}$SourceText = '';return $ret;}}}function Utf8_Unicode($char) {switch(strlen($char)) {case 1:return ord($char);case 2:$n = (ord($char) & 0x3f) << 6;$n += ord($char) & 0x3f;return $n;case 3:$n = (ord($char) & 0x1f) << 12;$n += (ord($char) & 0x3f) << 6;$n += ord($char) & 0x3f;return $n;case 4:$n = (ord($char) & 0x0f) << 18;$n += (ord($char) & 0x3f) << 12;$n += (ord($char) & 0x3f) << 6;$n += ord($char) & 0x3f;return $n;}}}?>转化类用的数据库source/include/table  gb-unicode.table
            $c = new Chinese('utf8', CHARSET, TRUE);
            $data = $c->Convert($data);
页: [1]
查看完整版本: Discuz字符编码转换类2