| [ Index ] |
PHP Cross Reference of Akelos Framework |
[Summary view] [Print] [Text view]
1 <?php 2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ 3 4 // +----------------------------------------------------------------------+ 5 // | Akelos Framework - http://www.akelos.org | 6 // +----------------------------------------------------------------------+ 7 // | Copyright (c) 2002-2006, Akelos Media, S.L. & Bermi Ferrer Martinez | 8 // | Released under the GNU Lesser General Public License, see LICENSE.txt| 9 // +----------------------------------------------------------------------+ 10 11 /** 12 * @package ActiveSupport 13 * @subpackage I18n-L10n 14 * @author Bermi Ferrer <bermi a.t akelos c.om> 15 * @copyright Copyright (c) 2002-2006, Akelos Media, S.L. http://www.akelos.org 16 * @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html> 17 */ 18 19 20 /** 21 * Charset conversion using UT8 mapping tables. 22 * 23 * Charset conversion using 4 different methods. Pure PHP 24 * conversion or one of this PHP extensions iconv, recode and 25 * multibyte. 26 * 27 * Supported charsets are: 28 * ASCII, ISO 8859-1, ISO 8859-2, ISO 8859-3, ISO 8859-4, ISO 29 * 8859-5, ISO 8859-6, ISO 8859-7, ISO 8859-8, ISO 8859-9, ISO 30 * 8859-10, ISO 8859-11, ISO 8859-13, ISO 8859-14, ISO 8859-15, 31 * ISO 8859-16, CP437, CP737, CP850, CP852, CP855, CP857, 32 * CP858, CP860, CP861, CP863, CP865, CP866, CP869, 33 * Windows-1250, Windows-1251, Windows-1252, Windows-1253, 34 * Windows-1254, Windows-1255, Windows-1256, Windows-1257, 35 * Windows-1258, KOI8-R, KOI8-U, ISCII, VISCII, Big5, HKSCS, 36 * GB2312, GB18030, Shift-JIS, EUC 37 * 38 * More information about charsets at 39 * http://en.wikipedia.org/wiki/Character_encoding 40 * 41 * @author Bermi Ferrer <bermi@akelos.org> 42 * @copyright Copyright (c) 2002-2005, Akelos Media, S.L. http://www.akelos.org 43 * @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html> 44 * @since 0.1 45 * @version $Revision 0.1 $ 46 */ 47 class AkCharset 48 { 49 50 51 // ------ CLASS ATTRIBUTES ------ // 52 53 54 55 // ---- Public attributes ---- // 56 57 58 /** 59 * Allow charset recoding. 60 * 61 * @access public 62 * @var bool $enableCharsetRecoding 63 */ 64 var $enableCharsetRecoding = true; 65 66 /** 67 * Allow or disallow PHP Based charset conversion. 68 * 69 * @access public 70 * @var boolean $usePhpRecoding 71 */ 72 var $usePhpRecoding = true; 73 74 /** 75 * Default charset 76 * 77 * @access public 78 * @var string $defaultCharset 79 */ 80 var $defaultCharset = 'ISO-8859-1'; 81 82 /** 83 * UTF-8 error character 84 * 85 * Char that will be used when no matches are found on the UTF8 86 * mapping table 87 * 88 * @access public 89 * @var string $utf8ErrorChar 90 */ 91 var $utf8ErrorChar = '?'; 92 93 94 // ---- Private attributes ---- // 95 96 97 /** 98 * Current encoding engine 99 * 100 * @see GetRecodingEngine 101 * @see SetRecodingEngine 102 * @access private 103 * @var string $_recodingEngine 104 */ 105 var $_recodingEngine = null; 106 107 /** 108 * Extra parameters for invoking the encoding engine (useful 109 * for iconv) 110 * 111 * @see GetRecodingEngineExtraParams 112 * @see SetRecodingEngineExtraParams 113 * @access private 114 * @var string $_recodingEngineExtraParams 115 */ 116 var $_recodingEngineExtraParams = ''; 117 118 /** 119 * Holds current procesing charset. 120 * 121 * @see GetCurrentCharset 122 * @access private 123 * @var string $_currentCharset 124 */ 125 var $_currentCharset = 'ISO-8859-1'; 126 127 128 129 // ------------------------------ 130 131 132 133 // ------ CLASS METHODS ------ // 134 135 136 137 // ---- Getters ---- // 138 139 140 /** 141 * $this->_recodingEngine getter 142 * 143 * Use this method to get $this->_recodingEngine value 144 * 145 * @access public 146 * @see set$recodingEngine 147 * @return string Returns Current encoding engine value. 148 */ 149 function GetRecodingEngine() 150 { 151 return $this->_recodingEngine; 152 153 }// -- end of GetRecodingEngine -- // 154 155 /** 156 * $this->_recodingEngineExtraParams getter 157 * 158 * Use this method to get $this->_recodingEngineExtraParams 159 * value 160 * 161 * @access public 162 * @see set$recodingEngineExtraParams 163 * @return string Returns Extra parameters for invoking the encoding 164 * engine (useful for iconv) value. 165 */ 166 function GetRecodingEngineExtraParams() 167 { 168 return $this->_recodingEngineExtraParams; 169 170 }// -- end of GetRecodingEngineExtraParams -- // 171 172 /** 173 * $this->_currentCharset getter 174 * 175 * Use this method to get $this->_currentCharset value 176 * 177 * @access public 178 * @see set$currentCharset 179 * @return string Returns Holds current procesing charset. value. 180 */ 181 function GetCurrentCharset() 182 { 183 return $this->_currentCharset; 184 185 }// -- end of GetCurrentCharset -- // 186 187 188 // ---- Setters ---- // 189 190 191 /** 192 * Sets the default recoding engine. 193 * 194 * @access public 195 * @uses _LoadExtension 196 * @param string $engine Possible engines are: 197 * - iconv (http://php.net/iconv) 198 * - mbstring (http://php.net/mb_convert_encoding) 199 * - recode (http://php.net/recode_string) 200 * @param string $extra_params Extra parameters for invoking the encoding engine 201 * (useful for iconv) 202 * @return string Name of current recoding engine 203 */ 204 function SetRecodingEngine($engine = null, $extra_params = null) 205 { 206 static $memory; 207 208 if(isset($memory[$engine.$extra_params])){ 209 return $memory[$engine.$extra_params]; 210 } 211 212 $engines = array('iconv'=>'iconv','mbstring'=>'mb_convert_encoding','recode'=>'recode_string'); 213 $this->_recodingEngine = false; 214 // Fix for systems with constant iconv defined. Php uses libiconv function instead 215 if (!function_exists('iconv') && function_exists('libiconv')) { 216 function iconv($input_encoding, $output_encoding, $string) { 217 return libiconv($input_encoding, $output_encoding, $string); 218 } 219 } 220 if(empty($engine)){ 221 foreach ($engines as $_engine=>$function){ 222 if(@function_exists($function)){ 223 $this->_recodingEngine = $_engine; 224 break; 225 }elseif($this->_LoadExtension($_engine)&&function_exists($function)){ 226 $this->_recodingEngine = $_engine; 227 break; 228 } 229 } 230 }elseif (isset($engines[$engine])){ 231 if(!@function_exists($engines[$engine])){ 232 user_error(Ak::t('Could not set AkCharset::SetRecodingEngine("%engine");',array('%engine'=>$engine)),E_USER_NOTICE); 233 $memory[$engine.$extra_params] = false; 234 }else{ 235 $this->_recodingEngine = $engine; 236 } 237 } 238 if(isset($extra_params)){ 239 $this->_recodingEngineExtraParams = $extra_params; 240 } 241 $memory[$engine.$extra_params] = $this->_recodingEngine; 242 return $this->_recodingEngine; 243 }// -- end of &SetRecodingEngine -- // 244 245 246 /** 247 * $this->_recodingEngineExtraParams setter 248 * 249 * Use this method to set $this->_recodingEngineExtraParams 250 * value 251 * 252 * @access public 253 * @see get$recodingEngineExtraParams 254 * @param string $recoding__engine__extra__params Extra parameters for invoking the encoding engine 255 * (useful for iconv) 256 * @return bool Returns true if $this->_recodingEngineExtraParams 257 * has been set correctly. 258 */ 259 function SetRecodingEngineExtraParams($recoding__engine__extra__params) 260 { 261 $this->_recodingEngineExtraParams = $recoding__engine__extra__params; 262 263 }// -- end of SetRecodingEngineExtraParams -- // 264 265 266 // ---- Public methods ---- // 267 268 269 /** 270 * Changes the charset encoding of one string to other charset. 271 * 272 * This function will convert a string from one charset to 273 * another. 274 * Unfortunately PHP has not native Unicode support, so in 275 * order to display and handle different charsets, this 276 * function wraps 3 non standard PHP extensions plus an 277 * additional Pure PHP conversion utility for systems that do 278 * not have this extensions enabled. 279 * 280 * @access public 281 * @param string $string String to recode 282 * @param string $target_charset Target charset. AkCharset availability may vary 283 * depending on your system configuration. 284 * @param string $origin_charset Input string charset. AkCharset availability may 285 * vary depending on your system configuration. 286 * This parameter is optional if you are using 287 * multibyte extension. 288 * @param string $engine Possible engines are: 289 * - iconv (http://php.net/iconv) 290 * - mbstring (http://php.net/mb_convert_encoding) 291 * - recode (http://php.net/recode_string) 292 * @param string $engine_extra_params Extra parameters for invoking the encoding engine 293 * (useful for iconv) 294 * @return void Recoded string if possible, otherwise it will 295 * return the string without modifications. 296 */ 297 function RecodeString($string, $target_charset, $origin_charset = null, $engine = null, $engine_extra_params = null) 298 { 299 static $memory; 300 if(!is_string($string)){ 301 return $string; 302 } 303 if($this->enableCharsetRecoding == false || $target_charset==$origin_charset){ 304 return $string; 305 } 306 if(isset($engine) || !isset($memory['engine'])){ 307 $engine = $memory['engine'] = $this->SetRecodingEngine($engine,$engine_extra_params); 308 }else{ 309 $engine = $memory['engine']; 310 } 311 if(!$engine && !$this->usePhpRecoding){ 312 return $string; 313 } 314 $method = strlen($engine)>1 ? '_'.ucfirst($engine).'StringRecode' : '_PhpStringRecode'; 315 316 if(method_exists($this,$method)){ 317 return $this->$method($string, $target_charset, $origin_charset, $engine_extra_params); 318 }else{ 319 user_error(Ak::t('Could not invoque AkCharset::%method();',array('%method'=>$method)),E_USER_NOTICE); 320 return $string; 321 } 322 }// -- end of &RecodeString -- // 323 324 /** 325 * Fetch an array with UTF8 charset equivalence table. 326 * 327 * @access public 328 * @uses _LoadInverseMap 329 * @uses _GetCharset 330 * @param string $charset Desired charset 331 * @return mixed Multilevel array with selected mapping: 332 * array( 333 * 'to_utf' => array(CHARS_VAL=>UTF_VAL), 334 * 'from_utf' => array(UTF_VAL=>CHARS_VAL) 335 * ); 336 * 337 * False if mapping is not found. 338 */ 339 function GetMapping($charset) 340 { 341 $charset = $this->_GetCharset($charset,false); 342 if($charset!=false){ 343 $mapping = array(); 344 include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$charset.'.php'); 345 if(class_exists($charset)){ 346 $mappingObject =& Ak::singleton($charset,$charset); 347 $mapping["to_utf"] = $mappingObject->_toUtfMap; 348 $mappingObject->_LoadInverseMap(); 349 $mapping["from_utf"] = $mappingObject->_fromUtfMap; 350 351 return $mapping; 352 } 353 } 354 return false; 355 }// -- end of &GetMapping -- // 356 357 358 // ---- Private methods ---- // 359 360 361 /** 362 * Tries to load required extension. 363 * 364 * @access private 365 * @see SetRecodingEngine 366 * @param string $extension Extension name 367 * @return boolean Returns true on success false on failure. 368 */ 369 function _LoadExtension($extension) 370 { 371 static $memory; 372 if(!isset($memory[$extension])){ 373 if (!extension_loaded($extension)) { 374 if(!ini_get('safe_mode')){ 375 $prefix = (PHP_SHLIB_SUFFIX == 'dll') ? 'php_' : ''; 376 $memory[$extension] = @dl($prefix .$extension.PHP_SHLIB_SUFFIX); 377 }else{ 378 $memory[$extension] = false; 379 } 380 }else{ 381 $memory[$extension] = true; 382 } 383 } 384 return $memory[$extension]; 385 }// -- end of &_LoadExtension -- // 386 387 /** 388 * AkCharset::RecodeString() iconv implementation 389 * 390 * @access private 391 * @see RecodeString 392 * @return string Recoded string if possible, otherwise it will 393 * return the string without modifications. 394 */ 395 function _IconvStringRecode($string, $target_charset, $origin_charset, $engine_extra_params=null) 396 { 397 if(!$this->_ConversionIsNeeded($origin_charset, $target_charset) && !$this->isUtf8($string)){ 398 return $string; 399 } 400 401 $skip_combinations = array('ISO-8859-1.UTF-8', 'UTF-8.ISO-8859-1'); 402 if(in_array($target_charset.'.'.$origin_charset, $skip_combinations)){ 403 return $this->_PhpStringRecode($string, $target_charset, $origin_charset); 404 } 405 406 $engine_extra_params = isset($engine_extra_params) ? $engine_extra_params : $this->_recodingEngineExtraParams; 407 if(!$result = @iconv($target_charset, $origin_charset.$engine_extra_params, $string)){ 408 return $this->_PhpStringRecode($string, $target_charset, $origin_charset); 409 }else{ 410 return $result; 411 } 412 }// -- end of &_IconvStringRecode -- // 413 414 415 /** 416 * AkCharset::RecodeString() recode_string implementation 417 * 418 * @access private 419 * @see RecodeString 420 * @return string Recoded string if possible, otherwise it will 421 * return the string without modifications. 422 */ 423 function _RecodeStringRecode($string, $target_charset, $origin_charset) 424 { 425 return recode_string($target_charset, '..'.$origin_charset, $string); 426 }// -- end of &_RecodeStringRecode -- // 427 428 /** 429 * AkCharset::RecodeString() mb_convert_encoding implementation 430 * 431 * @access private 432 * @see RecodeString 433 * @return string Recoded string if possible, otherwise it will 434 * return the string without modifications. 435 */ 436 function _MbstringStringRecode($string, $target_charset, $origin_charset=null) 437 { 438 if(is_null($origin_charset)){ 439 $origin_charset = $string; 440 }else{ 441 if(!$this->_ConversionIsNeeded($origin_charset, $target_charset) && !$this->isUtf8($string)){ 442 return $string; 443 } 444 } 445 $origin_charset = empty($origin_charset) ? mb_detect_encoding($string) : $origin_charset; 446 if(!@mb_check_encoding('', $origin_charset) || !@mb_check_encoding('', $target_charset)){ 447 $result = $this->_PhpStringRecode($string, $target_charset, $origin_charset); 448 }else{ 449 $result = mb_convert_encoding($string,$target_charset, $origin_charset); 450 } 451 return $result; 452 }// -- end of &_MbstringStringRecode -- // 453 454 /** 455 * AkCharset::RecodeString() Pure PHP implementation 456 * 457 * @access private 458 * @uses _Utf8StringEncode 459 * @uses _Utf8StringDecode 460 * @see RecodeString 461 * @see _Utf8StringEncode 462 * @see _Utf8StringDecode 463 * @return string Recoded string if possible, otherwise it will 464 * return the string without modifications. 465 */ 466 function _PhpStringRecode($string, $target_charset, $origin_charset) 467 { 468 $target_charset = $this->_GetCharset($target_charset, false); 469 $origin_charset = $this->_GetCharset($origin_charset, false); 470 471 if((!$target_charset || !$origin_charset) || ((!$this->_ConversionIsNeeded($origin_charset, $target_charset) || !$this->usePhpRecoding) && !$this->isUtf8($string))){ 472 return $string; 473 } 474 if($origin_charset=='utf8'){ 475 include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$target_charset.'.php'); 476 if(class_exists($target_charset)){ 477 478 $mappingObject =& Ak::singleton($target_charset, $target_charset); 479 480 if(method_exists($mappingObject,'_Utf8StringDecode')){ 481 return $mappingObject->_Utf8StringDecode($string); 482 }else{ 483 return $string; 484 } 485 }else{ 486 return $string; 487 } 488 }elseif($target_charset=='utf8'){ 489 include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$origin_charset.'.php'); 490 if(class_exists($origin_charset)){ 491 $mappingObject =& Ak::singleton($origin_charset, $origin_charset); 492 if(method_exists($mappingObject,'_Utf8StringEncode')){ 493 return $mappingObject->_Utf8StringEncode($string); 494 }else{ 495 return $string; 496 } 497 }else{ 498 return $string; 499 } 500 }else{ 501 $utf8String = $this->_PhpStringRecode($string,'utf8',$origin_charset); 502 return $this->_PhpStringRecode($utf8String,$target_charset,'utf8'); 503 } 504 }// -- end of &_PhpStringRecode -- // 505 506 507 508 /** 509 * Checks for possibility or need of charset conversion. 510 * 511 * @access private 512 * @uses _GetCharset 513 * @param string $origin_charset 514 * @param string $target_charset 515 * @return boolean 516 */ 517 function _ConversionIsNeeded($origin_charset, $target_charset) 518 { 519 $target_charset = $this->_GetCharset($target_charset,false); 520 $origin_charset = $this->_GetCharset($origin_charset,false); 521 522 if(($origin_charset==$target_charset)||!$target_charset||!$origin_charset){ 523 return false; 524 } 525 526 if($origin_charset == 'utf8' || $target_charset == 'utf8'){ 527 return true; 528 } 529 $similar_charsets[] = array('cp1257','iso885913','iso88594'); 530 $similar_charsets[] = array('koi8u','cp1251','iso88595','koi8r'); 531 532 foreach ($similar_charsets as $group){ 533 if(in_array($origin_charset,$group)&&in_array($target_charset,$group)){ 534 return true; 535 } 536 } 537 return false; 538 }// -- end of &_ConversionIsNeeded -- // 539 540 /** 541 * Filters input charset and returns a custom formated value 542 * for class wide usage. 543 * 544 * @access private 545 * @param string $charset AkCharset name 546 * @param boolean $set_charset If true will set $this->defaultCharset value 547 * @return mixed AkCharset internal name or FALSE if charset is not 548 * found. 549 */ 550 function _GetCharset($charset = null, $set_charset = true) 551 { 552 static $memory; 553 if(isset($memory[$charset])){ 554 return $memory[$charset]; 555 } 556 557 $procesed_charset = $charset == null ? $this->defaultCharset : $charset; 558 $procesed_charset = str_replace(array('-','_','.',' '),'',strtolower(trim($procesed_charset))); 559 $procesed_charset = str_replace(array('windows','ibm'),'cp',strtolower(trim($procesed_charset))); 560 $alias_xref = array('437'=>'cp437','850'=>'cp850','852'=>'cp852','855'=>'cp855','857'=>'cp857', 561 '860'=>'cp860','861'=>'cp861','862'=>'cp862','863'=>'cp863','865'=>'cp865','866'=>'cp866','869'=>'cp869', 562 'ansix341968'=>'ascii','ansix341986'=>'ascii','arabic'=>'iso88596','asmo708'=>'iso88596','big5cp950'=>'big5', 563 'cp367'=>'ascii','cp819'=>'iso88591','cpgr'=>'cp869','cpis'=>'cp861','csascii'=>'ascii','csbig5'=>'big5', 564 'cscp855'=>'cp855','cscp857'=>'cp857','cscp860'=>'cp860','cscp861'=>'cp861','cscp863'=>'cp863','cscp864'=>'cp864', 565 'cscp865'=>'cp865','cscp866'=>'cp866','cscp869'=>'cp869','cseuckr'=>'euckr','cseucpkdfmtjapanese'=>'eucjp', 566 'csgb2312'=>'gb18030','csisolatin1'=>'iso88591','csisolatin2'=>'iso88592','csisolatin3'=>'iso88593', 567 'csisolatin4'=>'iso88594','csisolatin5'=>'iso88599','csisolatinarabic'=>'iso88596', 568 'csisolatincyrillic'=>'iso88595','csisolatingreek'=>'iso88597','csisolatinhebrew'=>'iso88598','cskoi8r'=>'koi8r', 569 'cspc850multilingual'=>'cp850','cspc862latinhebrew'=>'cp862','cspc8codepage437'=>'cp437','cspcp852'=>'cp852', 570 'csshiftjis'=>'shiftjis','cyrillic'=>'iso88595','ecma114'=>'iso88596','ecma118'=>'iso88597','elot928'=>'iso88597', 571 'extendedunixcodepackedformatforjapanese'=>'eucjp','gb2312'=>'gb18030','greek'=>'iso88597','greek8'=>'iso88597', 572 'hebrew'=>'iso88598','hkscsbig5'=>'big5hkscs','iso646irv:1991'=>'ascii','iso646us'=>'ascii', 573 'iso885914:1998'=>'iso885914','iso88591:1987'=>'iso88591','iso88592:1987'=>'iso88592','iso88593:1988'=>'iso88593', 574 'iso88594:1988'=>'iso88594','iso88595:1988'=>'iso88595','iso88596:1987'=>'iso88596','iso88597:1987'=>'iso88597', 575 'iso88598:1988'=>'iso88598','iso88599:1989'=>'iso88599','isoceltic'=>'iso885914','isoir100'=>'iso88591', 576 'isoir101'=>'iso88592','isoir109'=>'iso88593','isoir110'=>'iso88594','isoir126'=>'iso88597','isoir127'=>'iso88596', 577 'isoir138'=>'iso88598','isoir144'=>'iso88595','isoir148'=>'iso88599','isoir166'=>'tis620','isoir179'=>'iso885913', 578 'isoir199'=>'iso885914','isoir226'=>'iso885916','isoir6'=>'ascii','l1'=>'iso88591','l10'=>'iso885916','l2'=>'iso88592', 579 'l3'=>'iso88593','l4'=>'iso88594','l5'=>'iso88599','l7'=>'iso885913','l8'=>'iso885914','latin1'=>'iso88591', 580 'latin10'=>'iso885916','latin2'=>'iso88592','latin3'=>'iso88593','latin4'=>'iso88594','latin5'=>'iso88599', 581 'latin7'=>'iso885913','latin8'=>'iso885914','mscyrl'=>'cp1251','mshebr'=>'cp1255','mskanji'=>'shiftjis', 582 'sjis'=>'shiftjis','tcabig5'=>'big5','tis6200'=>'tis620','tis62025291'=>'tis620','tis62025330'=>'tis620', 583 'us'=>'ascii','usascii'=>'ascii'); 584 $alias = array( 585 'armscii8'=>'armscii_8','ascii'=>'ascii','big5hkscs'=>'big5_hkscs','utf8'=>'utf8', 586 'big5'=>'big5','cp1046'=>'cp1046','cp1124'=>'cp1124','cp1125'=>'cp1125','cp1129'=>'cp1129', 587 'cp1133'=>'cp1133','cp1161'=>'cp1161','cp1162'=>'cp1162','cp1163'=>'cp1163','cp1250'=>'cp1250', 588 'cp1251'=>'cp1251','cp1252'=>'cp1252','cp1253'=>'cp1253','cp1254'=>'cp1254','cp1255'=>'cp1255', 589 'cp1256'=>'cp1256','cp1257'=>'cp1257','cp1258'=>'cp1258','cp437'=>'cp437','cp737'=>'cp737', 590 'cp775'=>'cp775','cp850'=>'cp850','cp852'=>'cp852','cp853'=>'cp853','cp855'=>'cp855','cp856'=>'cp856', 591 'cp857'=>'cp857','cp858'=>'cp858','cp860'=>'cp860','cp861'=>'cp861','cp862'=>'cp862','cp863'=>'cp863', 592 'cp864'=>'cp864','cp865'=>'cp865','cp866'=>'cp866','cp869'=>'cp869','cp874'=>'cp874','cp922'=>'cp922', 593 'cp932'=>'cp932','cp949'=>'cp949','cp950'=>'cp950','dechanyu'=>'dec_hanyu','deckanji'=>'dec_kanji', 594 'euccn'=>'euc_cn','eucjisx0213'=>'euc_jisx0213','eucjp'=>'euc_jp','euckr'=>'euc_kr','euctw'=>'euc_tw', 595 'gb18030'=>'gb18030','gbk'=>'gbk','georgianacademy'=>'georgian_academy','georgianps'=>'georgian_ps', 596 'hproman8'=>'hp_roman8','iso88591'=>'iso_8859_1','iso885910'=>'iso_8859_10','iso885913'=>'iso_8859_13', 597 'iso885914'=>'iso_8859_14','iso885915'=>'iso_8859_15','iso885916'=>'iso_8859_16','iso88592'=>'iso_8859_2', 598 'iso88593'=>'iso_8859_3','iso88594'=>'iso_8859_4','iso88595'=>'iso_8859_5','iso88596'=>'iso_8859_6', 599 'iso88597'=>'iso_8859_7','iso88598'=>'iso_8859_8','iso88599'=>'iso_8859_9','isoir165'=>'iso_ir_165', 600 'iso646cn'=>'iso646_cn','iso646jp'=>'iso646_jp','jisx0201'=>'jis_x0201','johab'=>'johab','koi8r'=>'koi8_r', 601 'koi8ru'=>'koi8_ru','koi8t'=>'koi8_t','koi8u'=>'koi8_u','macarabic'=>'macarabic', 602 'maccentraleurope'=>'maccentraleurope','maccroatian'=>'maccroatian','maccyrillic'=>'maccyrillic', 603 'macgreek'=>'macgreek','machebrew'=>'machebrew','maciceland'=>'maciceland','macroman'=>'macroman', 604 'macromania'=>'macromania','macthai'=>'macthai','macturkish'=>'macturkish','macukraine'=>'macukraine', 605 'mulelao1'=>'mulelao_1','nextstep'=>'nextstep','riscoslatin1'=>'riscos_latin1','shiftjis'=>'shift_jis', 606 'shiftjisx0213'=>'shift_jisx0213','tcvn'=>'tcvn','tds565'=>'tds565','tis620'=>'tis_620','viscii'=>'viscii', 607 'iso885911'=>'iso_8859_11', 'jis0228' => 'jis_0228', 'jis0212' => 'jis_0212' 608 ); 609 $procesed_charset = isset($alias_xref[$procesed_charset]) ? $alias_xref[$procesed_charset] : $procesed_charset; 610 $memory[$charset] = isset($alias[$procesed_charset]) ? $alias[$procesed_charset] : false; 611 if($set_charset){ 612 $this->_currentCharset = $memory[$charset]; 613 } 614 615 return $memory[$charset]; 616 }// -- end of &_GetCharset -- // 617 618 /** 619 * Encodes given string as UTF8 text. 620 * 621 * Given string and charset mapping, returns input string as 622 * UTF8 text 623 * 624 * @access private 625 * @uses _CharToUtf8 626 * @see _PhpStringRecode 627 * @see _Utf8StringDecode 628 * @param string $string Text to be converted to UTF8 629 * @param array $mapping_array Array containing the charset mapping. 630 * @return string UTF8 String 631 */ 632 function _Utf8StringEncode($string, $mapping_array) 633 { 634 $chars = unpack('C*', $string); 635 $count = count($chars); 636 for($i=1;$i<=$count;$i++){ 637 if(!isset($mapping_array[$chars[$i]])){ 638 continue; 639 }else{ 640 $char = (int)$mapping_array[$chars[$i]]; 641 } 642 $chars[$i] = $this->_CharToUtf8($char); 643 } 644 return implode('',$chars); 645 }// -- end of &_Utf8StringEncode -- // 646 647 /** 648 * Decodes data, assumed to be UTF-8 encoded given its 649 * equivalence map. 650 * 651 * @access private 652 * @uses _Utf8 653 * @uses ToChar 654 * @see _PhpStringRecode 655 * @see _Utf8StringEncode 656 * @param string $utf_string UTF8 string 657 * @param array $mapping_array Mapping array 658 * @return string Decoded string 659 */ 660 function _Utf8StringDecode($utf_string, $mapping_array) 661 { 662 $chars = unpack('C*', $utf_string); 663 $count = count($chars); 664 $result = ''; 665 for ($i=1;$i<=$count;$i++){ 666 $result .= $this->_Utf8ToChar($chars,$i,$mapping_array); 667 } 668 return $result; 669 }// -- end of &_Utf8StringDecode -- // 670 671 672 /** 673 * Converts a single character to its UTF8 representation 674 * 675 * @access protected 676 * @see _Utf8StringEncode 677 * @param string $char Char to be converted 678 * @return string UTF8 char 679 */ 680 function _CharToUtf8($char) 681 { 682 if ($char < 0x80){ 683 $utf8_char = chr($char); 684 // 2 bytes 685 }else if($char<0x800){ 686 $utf8_char = (chr(0xC0 | $char>>6) . chr(0x80 | $char & 0x3F)); 687 // 3 bytes 688 }else if($char<0x10000){ 689 $utf8_char = (chr(0xE0 | $char>>12) . chr(0x80 | $char>>6 & 0x3F) . chr(0x80 | $char & 0x3F)); 690 // 4 bytes 691 }else if($char<0x200000){ 692 $utf8_char = (chr(0xF0 | $char>>18) . chr(0x80 | $char>>12 & 0x3F) . chr(0x80 | $char>>6 & 0x3F) . chr(0x80 | $char & 0x3F)); 693 } 694 return $utf8_char; 695 }// -- end of &_CharToUtf8 -- // 696 697 698 699 /** 700 * Decodes a single UTF8 char to it's representation as 701 * specified in the mapping array 702 * 703 * @access private 704 * @see _Utf8StringDecode 705 * @param array $chars Assoc array with chars to be decoded 706 * @param integer &$id Current char position 707 * @param array $mapping_array Mapping Array 708 * @return string Decoded char 709 */ 710 function _Utf8ToChar($chars, &$id, $mapping_array) 711 { 712 if(($chars[$id]>=240)&&($chars[$id]<=255)){ 713 $utf=(intval($chars[$id]-240)<<18)+(intval($chars[++$id]-128)<<12)+(intval($chars[++$id]-128)<<6)+(intval($chars[++$id]-128)<<0); 714 }elseif(($chars[$id]>=224)&&($chars[$id]<=239)){ 715 $utf=(intval($chars[$id]-224)<<12)+(intval($chars[++$id]-128)<<6)+(intval($chars[++$id]-128)<<0); 716 }elseif(($chars[$id]>=192)&&($chars[$id]<=223)){ 717 $utf=(intval($chars[$id]-192)<<6)+(intval($chars[++$id]-128)<<0); 718 }else{ 719 $utf=$chars[$id]; 720 } 721 if(array_key_exists($utf,$mapping_array)){ 722 return chr($mapping_array[$utf]); 723 }else{ 724 return $this->utf8ErrorChar; 725 } 726 }// -- end of &_Utf8ToChar -- // 727 728 729 function isUtf8($text = '') 730 { 731 // From http://w3.org/International/questions/qa-forms-utf-8.html 732 return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $text); 733 } 734 735 function _charsetMapFileExists($charset) 736 { 737 if(!file_exists(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$charset.'.php')){ 738 trigger_error(Ak::t('Charset %charset is not supported on your current setting. Please download aditional charset maps from http://svn.akelos.org/extras/utf8_mappings/ into lib/AkActionView/utf8_mappings', array('%charset'=>$charset)), E_USER_NOTICE); 739 return false; 740 } 741 return true; 742 } 743 } 744 745 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Mon Oct 27 12:43:49 2008 | Cross-referenced by PHPXref 0.6 |