[ Index ]

PHP Cross Reference of Akelos Framework

title

Body

[close]

/ -> AkCharset.php (source)

   1  <?php
   2  /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
   3  
   4  // +----------------------------------------------------------------------+
   5  // | Akelos Framework - http://www.akelos.org                             |
   6  // +----------------------------------------------------------------------+
   7  // | Copyright (c) 2002-2006, Akelos Media, S.L.  & Bermi Ferrer Martinez |
   8  // | Released under the GNU Lesser General Public License, see LICENSE.txt|
   9  // +----------------------------------------------------------------------+
  10  
  11  /**
  12   * @package ActiveSupport
  13   * @subpackage I18n-L10n
  14   * @author Bermi Ferrer <bermi a.t akelos c.om>
  15   * @copyright Copyright (c) 2002-2006, Akelos Media, S.L. http://www.akelos.org
  16   * @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html>
  17   */
  18  
  19  
  20  /**
  21  * Charset conversion using UT8 mapping tables.
  22  *
  23  * Charset conversion using 4 different methods. Pure PHP
  24  * conversion or one of this PHP extensions  iconv, recode and
  25  * multibyte.
  26  *
  27  * Supported charsets are:
  28  * ASCII, ISO 8859-1, ISO 8859-2, ISO 8859-3, ISO 8859-4, ISO
  29  * 8859-5, ISO 8859-6, ISO 8859-7, ISO 8859-8, ISO 8859-9, ISO
  30  * 8859-10, ISO 8859-11, ISO 8859-13, ISO 8859-14, ISO 8859-15,
  31  * ISO 8859-16, CP437, CP737, CP850, CP852, CP855, CP857,
  32  * CP858, CP860, CP861, CP863, CP865, CP866, CP869,
  33  * Windows-1250, Windows-1251, Windows-1252, Windows-1253,
  34  * Windows-1254, Windows-1255, Windows-1256, Windows-1257,
  35  * Windows-1258, KOI8-R, KOI8-U, ISCII, VISCII, Big5, HKSCS,
  36  * GB2312, GB18030, Shift-JIS, EUC
  37  *
  38  * More information about charsets at
  39  * http://en.wikipedia.org/wiki/Character_encoding
  40  *
  41  * @author Bermi Ferrer <bermi@akelos.org>
  42  * @copyright Copyright (c) 2002-2005, Akelos Media, S.L. http://www.akelos.org
  43  * @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html>
  44  * @since 0.1
  45  * @version $Revision 0.1 $
  46  */
  47  class AkCharset
  48  {
  49  
  50  
  51      // ------ CLASS ATTRIBUTES ------ //
  52  
  53  
  54  
  55      // ---- Public attributes ---- //
  56  
  57  
  58      /**
  59      * Allow charset recoding.
  60      *
  61      * @access public
  62      * @var    bool    $enableCharsetRecoding
  63      */
  64      var $enableCharsetRecoding = true;
  65  
  66      /**
  67      * Allow or disallow PHP Based charset conversion.
  68      *
  69      * @access public
  70      * @var    boolean    $usePhpRecoding
  71      */
  72      var $usePhpRecoding = true;
  73  
  74      /**
  75      * Default charset
  76      *
  77      * @access public
  78      * @var    string    $defaultCharset
  79      */
  80      var $defaultCharset = 'ISO-8859-1';
  81  
  82      /**
  83      * UTF-8 error character
  84      *
  85      * Char that will be used when no matches are found on the UTF8
  86      * mapping table
  87      *
  88      * @access public
  89      * @var    string    $utf8ErrorChar
  90      */
  91      var $utf8ErrorChar = '?';
  92  
  93  
  94      // ---- Private attributes ---- //
  95  
  96  
  97      /**
  98      * Current encoding engine
  99      *
 100      * @see GetRecodingEngine
 101      * @see SetRecodingEngine
 102      * @access private
 103      * @var    string    $_recodingEngine
 104      */
 105      var $_recodingEngine = null;
 106  
 107      /**
 108      * Extra parameters for invoking the encoding engine (useful
 109      * for iconv)
 110      *
 111      * @see GetRecodingEngineExtraParams
 112      * @see SetRecodingEngineExtraParams
 113      * @access private
 114      * @var    string    $_recodingEngineExtraParams
 115      */
 116      var $_recodingEngineExtraParams = '';
 117  
 118      /**
 119      * Holds current procesing charset.
 120      *
 121      * @see GetCurrentCharset
 122      * @access private
 123      * @var    string    $_currentCharset
 124      */
 125      var $_currentCharset = 'ISO-8859-1';
 126  
 127  
 128  
 129      // ------------------------------
 130  
 131  
 132  
 133      // ------ CLASS METHODS ------ //
 134  
 135  
 136  
 137      // ---- Getters ---- //
 138  
 139  
 140      /**
 141      * $this->_recodingEngine getter
 142      *
 143      * Use this method to get $this->_recodingEngine value
 144      *
 145      * @access public
 146      * @see set$recodingEngine
 147      * @return    string    Returns Current encoding engine value.
 148      */
 149      function GetRecodingEngine()
 150      {
 151          return $this->_recodingEngine;
 152  
 153      }// -- end of GetRecodingEngine -- //
 154  
 155      /**
 156      * $this->_recodingEngineExtraParams getter
 157      *
 158      * Use this method to get $this->_recodingEngineExtraParams
 159      * value
 160      *
 161      * @access public
 162      * @see set$recodingEngineExtraParams
 163      * @return    string    Returns Extra parameters for invoking the encoding
 164      * engine (useful for iconv) value.
 165      */
 166      function GetRecodingEngineExtraParams()
 167      {
 168          return $this->_recodingEngineExtraParams;
 169  
 170      }// -- end of GetRecodingEngineExtraParams -- //
 171  
 172      /**
 173      * $this->_currentCharset getter
 174      *
 175      * Use this method to get $this->_currentCharset value
 176      *
 177      * @access public
 178      * @see set$currentCharset
 179      * @return    string    Returns Holds current procesing charset. value.
 180      */
 181      function GetCurrentCharset()
 182      {
 183          return $this->_currentCharset;
 184  
 185      }// -- end of GetCurrentCharset -- //
 186  
 187  
 188      // ---- Setters ---- //
 189  
 190  
 191      /**
 192      * Sets the default recoding engine.
 193      *
 194      * @access public
 195      * @uses _LoadExtension
 196      * @param    string    $engine    Possible engines are:
 197      * - iconv (http://php.net/iconv)
 198      * - mbstring (http://php.net/mb_convert_encoding)
 199      * - recode (http://php.net/recode_string)
 200      * @param    string    $extra_params    Extra parameters for invoking the encoding engine
 201      * (useful for iconv)
 202      * @return    string    Name of current recoding engine
 203      */
 204      function SetRecodingEngine($engine = null, $extra_params = null)
 205      {
 206          static $memory;
 207  
 208          if(isset($memory[$engine.$extra_params])){
 209              return $memory[$engine.$extra_params];
 210          }
 211  
 212          $engines = array('iconv'=>'iconv','mbstring'=>'mb_convert_encoding','recode'=>'recode_string');
 213          $this->_recodingEngine = false;
 214          // Fix for systems with constant iconv defined. Php uses libiconv function instead
 215          if (!function_exists('iconv') && function_exists('libiconv')) {
 216              function iconv($input_encoding, $output_encoding, $string) {
 217                  return libiconv($input_encoding, $output_encoding, $string);
 218              }
 219          }
 220          if(empty($engine)){
 221              foreach ($engines as $_engine=>$function){
 222                  if(@function_exists($function)){
 223                      $this->_recodingEngine = $_engine;
 224                      break;
 225                  }elseif($this->_LoadExtension($_engine)&&function_exists($function)){
 226                      $this->_recodingEngine = $_engine;
 227                      break;
 228                  }
 229              }
 230          }elseif (isset($engines[$engine])){
 231              if(!@function_exists($engines[$engine])){
 232                  user_error(Ak::t('Could not set AkCharset::SetRecodingEngine("%engine");',array('%engine'=>$engine)),E_USER_NOTICE);
 233                  $memory[$engine.$extra_params] = false;
 234              }else{
 235                  $this->_recodingEngine = $engine;
 236              }
 237          }
 238          if(isset($extra_params)){
 239              $this->_recodingEngineExtraParams = $extra_params;
 240          }
 241          $memory[$engine.$extra_params] = $this->_recodingEngine;
 242          return $this->_recodingEngine;
 243      }// -- end of &SetRecodingEngine -- //
 244  
 245  
 246      /**
 247      * $this->_recodingEngineExtraParams setter
 248      *
 249      * Use this method to set $this->_recodingEngineExtraParams
 250      * value
 251      *
 252      * @access public
 253      * @see get$recodingEngineExtraParams
 254      * @param    string    $recoding__engine__extra__params    Extra parameters for invoking the encoding engine
 255      * (useful for iconv)
 256      * @return    bool    Returns true if $this->_recodingEngineExtraParams
 257      * has been set correctly.
 258      */
 259      function SetRecodingEngineExtraParams($recoding__engine__extra__params)
 260      {
 261          $this->_recodingEngineExtraParams = $recoding__engine__extra__params;
 262  
 263      }// -- end of SetRecodingEngineExtraParams -- //
 264  
 265  
 266      // ---- Public methods ---- //
 267  
 268  
 269      /**
 270      * Changes the charset encoding of one string to other charset.
 271      *
 272      * This function will convert a string from one charset to
 273      * another.
 274      * Unfortunately PHP has not native Unicode support, so in
 275      * order to display and handle different charsets, this
 276      * function wraps 3 non standard PHP extensions plus an
 277      * additional Pure PHP conversion utility for systems that do
 278      * not have this extensions enabled.
 279      *
 280      * @access public
 281      * @param    string    $string    String to recode
 282      * @param    string    $target_charset    Target charset. AkCharset availability may vary
 283      * depending on your system configuration.
 284      * @param    string    $origin_charset    Input string charset. AkCharset availability may
 285      * vary depending on your system configuration.
 286      * This parameter is optional if you are using
 287      * multibyte extension.
 288      * @param    string    $engine    Possible engines are:
 289      * - iconv (http://php.net/iconv)
 290      * - mbstring (http://php.net/mb_convert_encoding)
 291      * - recode (http://php.net/recode_string)
 292      * @param    string    $engine_extra_params    Extra parameters for invoking the encoding engine
 293      * (useful for iconv)
 294      * @return    void    Recoded string if possible, otherwise it will
 295      * return the string without modifications.
 296      */
 297      function RecodeString($string, $target_charset, $origin_charset = null, $engine = null, $engine_extra_params = null)
 298      {
 299          static $memory;
 300          if(!is_string($string)){
 301              return $string;
 302          }
 303          if($this->enableCharsetRecoding == false || $target_charset==$origin_charset){
 304              return $string;
 305          }
 306          if(isset($engine) || !isset($memory['engine'])){
 307              $engine = $memory['engine'] = $this->SetRecodingEngine($engine,$engine_extra_params);
 308          }else{
 309              $engine = $memory['engine'];
 310          }
 311          if(!$engine && !$this->usePhpRecoding){
 312              return $string;
 313          }
 314          $method = strlen($engine)>1 ? '_'.ucfirst($engine).'StringRecode' : '_PhpStringRecode';
 315  
 316          if(method_exists($this,$method)){
 317              return $this->$method($string, $target_charset, $origin_charset, $engine_extra_params);
 318          }else{
 319              user_error(Ak::t('Could not invoque AkCharset::%method();',array('%method'=>$method)),E_USER_NOTICE);
 320              return $string;
 321          }
 322      }// -- end of &RecodeString -- //
 323  
 324      /**
 325      * Fetch an array with UTF8 charset equivalence table.
 326      *
 327      * @access public
 328      * @uses _LoadInverseMap
 329      * @uses _GetCharset
 330      * @param    string    $charset    Desired charset
 331      * @return    mixed    Multilevel array with selected mapping:
 332      * array(
 333      * 'to_utf' => array(CHARS_VAL=>UTF_VAL),
 334      * 'from_utf' => array(UTF_VAL=>CHARS_VAL)
 335      * );
 336      *
 337      * False if mapping is not found.
 338      */
 339      function GetMapping($charset)
 340      {
 341          $charset = $this->_GetCharset($charset,false);
 342          if($charset!=false){
 343              $mapping = array();
 344              include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$charset.'.php');
 345              if(class_exists($charset)){
 346                  $mappingObject =& Ak::singleton($charset,$charset);
 347                  $mapping["to_utf"] = $mappingObject->_toUtfMap;
 348                  $mappingObject->_LoadInverseMap();
 349                  $mapping["from_utf"] = $mappingObject->_fromUtfMap;
 350  
 351                  return $mapping;
 352              }
 353          }
 354          return false;
 355      }// -- end of &GetMapping -- //
 356  
 357  
 358      // ---- Private methods ---- //
 359  
 360  
 361      /**
 362      * Tries to load required extension.
 363      *
 364      * @access private
 365      * @see SetRecodingEngine
 366      * @param    string    $extension    Extension name
 367      * @return    boolean    Returns true on success false on failure.
 368      */
 369      function _LoadExtension($extension)
 370      {
 371          static $memory;
 372          if(!isset($memory[$extension])){
 373              if (!extension_loaded($extension)) {
 374                  if(!ini_get('safe_mode')){
 375                      $prefix = (PHP_SHLIB_SUFFIX == 'dll') ? 'php_' : '';
 376                      $memory[$extension] = @dl($prefix .$extension.PHP_SHLIB_SUFFIX);
 377                  }else{
 378                      $memory[$extension] = false;
 379                  }
 380              }else{
 381                  $memory[$extension] = true;
 382              }
 383          }
 384          return $memory[$extension];
 385      }// -- end of &_LoadExtension -- //
 386  
 387      /**
 388      * AkCharset::RecodeString() iconv implementation
 389      *
 390      * @access private
 391      * @see RecodeString
 392      * @return    string    Recoded string if possible, otherwise it will
 393      * return the string without modifications.
 394      */
 395      function _IconvStringRecode($string, $target_charset, $origin_charset, $engine_extra_params=null)
 396      {
 397          if(!$this->_ConversionIsNeeded($origin_charset, $target_charset) && !$this->isUtf8($string)){
 398              return $string;
 399          }
 400  
 401          $skip_combinations = array('ISO-8859-1.UTF-8', 'UTF-8.ISO-8859-1');
 402          if(in_array($target_charset.'.'.$origin_charset, $skip_combinations)){
 403              return $this->_PhpStringRecode($string, $target_charset, $origin_charset);
 404          }
 405  
 406          $engine_extra_params = isset($engine_extra_params) ? $engine_extra_params : $this->_recodingEngineExtraParams;
 407          if(!$result = @iconv($target_charset, $origin_charset.$engine_extra_params, $string)){
 408              return $this->_PhpStringRecode($string, $target_charset, $origin_charset);
 409          }else{
 410              return $result;
 411          }
 412      }// -- end of &_IconvStringRecode -- //
 413  
 414  
 415      /**
 416      * AkCharset::RecodeString() recode_string implementation
 417      *
 418      * @access private
 419      * @see RecodeString
 420      * @return    string    Recoded string if possible, otherwise it will
 421      * return the string without modifications.
 422      */
 423      function _RecodeStringRecode($string, $target_charset, $origin_charset)
 424      {
 425          return recode_string($target_charset, '..'.$origin_charset, $string);
 426      }// -- end of &_RecodeStringRecode -- //
 427  
 428      /**
 429      * AkCharset::RecodeString() mb_convert_encoding implementation
 430      *
 431      * @access private
 432      * @see RecodeString
 433      * @return    string    Recoded string if possible, otherwise it will
 434      * return the string without modifications.
 435      */
 436      function _MbstringStringRecode($string, $target_charset, $origin_charset=null)
 437      {
 438          if(is_null($origin_charset)){
 439              $origin_charset = $string;
 440          }else{
 441              if(!$this->_ConversionIsNeeded($origin_charset, $target_charset) && !$this->isUtf8($string)){
 442                  return $string;
 443              }
 444          }
 445          $origin_charset = empty($origin_charset) ? mb_detect_encoding($string) : $origin_charset;
 446          if(!@mb_check_encoding('', $origin_charset) || !@mb_check_encoding('', $target_charset)){
 447              $result = $this->_PhpStringRecode($string, $target_charset, $origin_charset);
 448          }else{
 449              $result = mb_convert_encoding($string,$target_charset, $origin_charset);
 450          }
 451          return $result;
 452      }// -- end of &_MbstringStringRecode -- //
 453  
 454      /**
 455      * AkCharset::RecodeString() Pure PHP implementation
 456      *
 457      * @access private
 458      * @uses _Utf8StringEncode
 459      * @uses _Utf8StringDecode
 460      * @see RecodeString
 461      * @see _Utf8StringEncode
 462      * @see _Utf8StringDecode
 463      * @return    string    Recoded string if possible, otherwise it will
 464      * return the string without modifications.
 465      */
 466      function _PhpStringRecode($string, $target_charset, $origin_charset)
 467      {
 468          $target_charset = $this->_GetCharset($target_charset, false);
 469          $origin_charset = $this->_GetCharset($origin_charset, false);
 470  
 471          if((!$target_charset || !$origin_charset) || ((!$this->_ConversionIsNeeded($origin_charset, $target_charset) || !$this->usePhpRecoding) && !$this->isUtf8($string))){
 472              return $string;
 473          }
 474          if($origin_charset=='utf8'){
 475              include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$target_charset.'.php');
 476              if(class_exists($target_charset)){
 477  
 478                  $mappingObject =& Ak::singleton($target_charset, $target_charset);
 479  
 480                  if(method_exists($mappingObject,'_Utf8StringDecode')){
 481                      return $mappingObject->_Utf8StringDecode($string);
 482                  }else{
 483                      return $string;
 484                  }
 485              }else{
 486                  return $string;
 487              }
 488          }elseif($target_charset=='utf8'){
 489              include_once(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$origin_charset.'.php');
 490              if(class_exists($origin_charset)){
 491                  $mappingObject =& Ak::singleton($origin_charset, $origin_charset);
 492                  if(method_exists($mappingObject,'_Utf8StringEncode')){
 493                      return $mappingObject->_Utf8StringEncode($string);
 494                  }else{
 495                      return $string;
 496                  }
 497              }else{
 498                  return $string;
 499              }
 500          }else{
 501              $utf8String = $this->_PhpStringRecode($string,'utf8',$origin_charset);
 502              return $this->_PhpStringRecode($utf8String,$target_charset,'utf8');
 503          }
 504      }// -- end of &_PhpStringRecode -- //
 505  
 506  
 507  
 508      /**
 509      * Checks for possibility or need of charset conversion.
 510      *
 511      * @access private
 512      * @uses _GetCharset
 513      * @param    string    $origin_charset
 514      * @param    string    $target_charset
 515      * @return    boolean
 516      */
 517      function _ConversionIsNeeded($origin_charset, $target_charset)
 518      {
 519          $target_charset = $this->_GetCharset($target_charset,false);
 520          $origin_charset = $this->_GetCharset($origin_charset,false);
 521  
 522          if(($origin_charset==$target_charset)||!$target_charset||!$origin_charset){
 523              return false;
 524          }
 525  
 526          if($origin_charset == 'utf8' || $target_charset == 'utf8'){
 527              return true;
 528          }
 529          $similar_charsets[] = array('cp1257','iso885913','iso88594');
 530          $similar_charsets[] = array('koi8u','cp1251','iso88595','koi8r');
 531  
 532          foreach ($similar_charsets as $group){
 533              if(in_array($origin_charset,$group)&&in_array($target_charset,$group)){
 534                  return true;
 535              }
 536          }
 537          return false;
 538      }// -- end of &_ConversionIsNeeded -- //
 539  
 540      /**
 541      * Filters input charset and returns a custom formated value
 542      * for class wide usage.
 543      *
 544      * @access private
 545      * @param    string    $charset    AkCharset name
 546      * @param    boolean    $set_charset    If true will set $this->defaultCharset value
 547      * @return    mixed    AkCharset internal name or FALSE if charset is not
 548      * found.
 549      */
 550      function _GetCharset($charset = null, $set_charset = true)
 551      {
 552          static $memory;
 553          if(isset($memory[$charset])){
 554              return $memory[$charset];
 555          }
 556  
 557          $procesed_charset = $charset == null ? $this->defaultCharset : $charset;
 558          $procesed_charset = str_replace(array('-','_','.',' '),'',strtolower(trim($procesed_charset)));
 559          $procesed_charset = str_replace(array('windows','ibm'),'cp',strtolower(trim($procesed_charset)));
 560          $alias_xref = array('437'=>'cp437','850'=>'cp850','852'=>'cp852','855'=>'cp855','857'=>'cp857',
 561          '860'=>'cp860','861'=>'cp861','862'=>'cp862','863'=>'cp863','865'=>'cp865','866'=>'cp866','869'=>'cp869',
 562          'ansix341968'=>'ascii','ansix341986'=>'ascii','arabic'=>'iso88596','asmo708'=>'iso88596','big5cp950'=>'big5',
 563          'cp367'=>'ascii','cp819'=>'iso88591','cpgr'=>'cp869','cpis'=>'cp861','csascii'=>'ascii','csbig5'=>'big5',
 564          'cscp855'=>'cp855','cscp857'=>'cp857','cscp860'=>'cp860','cscp861'=>'cp861','cscp863'=>'cp863','cscp864'=>'cp864',
 565          'cscp865'=>'cp865','cscp866'=>'cp866','cscp869'=>'cp869','cseuckr'=>'euckr','cseucpkdfmtjapanese'=>'eucjp',
 566          'csgb2312'=>'gb18030','csisolatin1'=>'iso88591','csisolatin2'=>'iso88592','csisolatin3'=>'iso88593',
 567          'csisolatin4'=>'iso88594','csisolatin5'=>'iso88599','csisolatinarabic'=>'iso88596',
 568          'csisolatincyrillic'=>'iso88595','csisolatingreek'=>'iso88597','csisolatinhebrew'=>'iso88598','cskoi8r'=>'koi8r',
 569          'cspc850multilingual'=>'cp850','cspc862latinhebrew'=>'cp862','cspc8codepage437'=>'cp437','cspcp852'=>'cp852',
 570          'csshiftjis'=>'shiftjis','cyrillic'=>'iso88595','ecma114'=>'iso88596','ecma118'=>'iso88597','elot928'=>'iso88597',
 571          'extendedunixcodepackedformatforjapanese'=>'eucjp','gb2312'=>'gb18030','greek'=>'iso88597','greek8'=>'iso88597',
 572          'hebrew'=>'iso88598','hkscsbig5'=>'big5hkscs','iso646irv:1991'=>'ascii','iso646us'=>'ascii',
 573          'iso885914:1998'=>'iso885914','iso88591:1987'=>'iso88591','iso88592:1987'=>'iso88592','iso88593:1988'=>'iso88593',
 574          'iso88594:1988'=>'iso88594','iso88595:1988'=>'iso88595','iso88596:1987'=>'iso88596','iso88597:1987'=>'iso88597',
 575          'iso88598:1988'=>'iso88598','iso88599:1989'=>'iso88599','isoceltic'=>'iso885914','isoir100'=>'iso88591',
 576          'isoir101'=>'iso88592','isoir109'=>'iso88593','isoir110'=>'iso88594','isoir126'=>'iso88597','isoir127'=>'iso88596',
 577          'isoir138'=>'iso88598','isoir144'=>'iso88595','isoir148'=>'iso88599','isoir166'=>'tis620','isoir179'=>'iso885913',
 578          'isoir199'=>'iso885914','isoir226'=>'iso885916','isoir6'=>'ascii','l1'=>'iso88591','l10'=>'iso885916','l2'=>'iso88592',
 579          'l3'=>'iso88593','l4'=>'iso88594','l5'=>'iso88599','l7'=>'iso885913','l8'=>'iso885914','latin1'=>'iso88591',
 580          'latin10'=>'iso885916','latin2'=>'iso88592','latin3'=>'iso88593','latin4'=>'iso88594','latin5'=>'iso88599',
 581          'latin7'=>'iso885913','latin8'=>'iso885914','mscyrl'=>'cp1251','mshebr'=>'cp1255','mskanji'=>'shiftjis',
 582          'sjis'=>'shiftjis','tcabig5'=>'big5','tis6200'=>'tis620','tis62025291'=>'tis620','tis62025330'=>'tis620',
 583          'us'=>'ascii','usascii'=>'ascii');
 584          $alias = array(
 585          'armscii8'=>'armscii_8','ascii'=>'ascii','big5hkscs'=>'big5_hkscs','utf8'=>'utf8',
 586          'big5'=>'big5','cp1046'=>'cp1046','cp1124'=>'cp1124','cp1125'=>'cp1125','cp1129'=>'cp1129',
 587          'cp1133'=>'cp1133','cp1161'=>'cp1161','cp1162'=>'cp1162','cp1163'=>'cp1163','cp1250'=>'cp1250',
 588          'cp1251'=>'cp1251','cp1252'=>'cp1252','cp1253'=>'cp1253','cp1254'=>'cp1254','cp1255'=>'cp1255',
 589          'cp1256'=>'cp1256','cp1257'=>'cp1257','cp1258'=>'cp1258','cp437'=>'cp437','cp737'=>'cp737',
 590          'cp775'=>'cp775','cp850'=>'cp850','cp852'=>'cp852','cp853'=>'cp853','cp855'=>'cp855','cp856'=>'cp856',
 591          'cp857'=>'cp857','cp858'=>'cp858','cp860'=>'cp860','cp861'=>'cp861','cp862'=>'cp862','cp863'=>'cp863',
 592          'cp864'=>'cp864','cp865'=>'cp865','cp866'=>'cp866','cp869'=>'cp869','cp874'=>'cp874','cp922'=>'cp922',
 593          'cp932'=>'cp932','cp949'=>'cp949','cp950'=>'cp950','dechanyu'=>'dec_hanyu','deckanji'=>'dec_kanji',
 594          'euccn'=>'euc_cn','eucjisx0213'=>'euc_jisx0213','eucjp'=>'euc_jp','euckr'=>'euc_kr','euctw'=>'euc_tw',
 595          'gb18030'=>'gb18030','gbk'=>'gbk','georgianacademy'=>'georgian_academy','georgianps'=>'georgian_ps',
 596          'hproman8'=>'hp_roman8','iso88591'=>'iso_8859_1','iso885910'=>'iso_8859_10','iso885913'=>'iso_8859_13',
 597          'iso885914'=>'iso_8859_14','iso885915'=>'iso_8859_15','iso885916'=>'iso_8859_16','iso88592'=>'iso_8859_2',
 598          'iso88593'=>'iso_8859_3','iso88594'=>'iso_8859_4','iso88595'=>'iso_8859_5','iso88596'=>'iso_8859_6',
 599          'iso88597'=>'iso_8859_7','iso88598'=>'iso_8859_8','iso88599'=>'iso_8859_9','isoir165'=>'iso_ir_165',
 600          'iso646cn'=>'iso646_cn','iso646jp'=>'iso646_jp','jisx0201'=>'jis_x0201','johab'=>'johab','koi8r'=>'koi8_r',
 601          'koi8ru'=>'koi8_ru','koi8t'=>'koi8_t','koi8u'=>'koi8_u','macarabic'=>'macarabic',
 602          'maccentraleurope'=>'maccentraleurope','maccroatian'=>'maccroatian','maccyrillic'=>'maccyrillic',
 603          'macgreek'=>'macgreek','machebrew'=>'machebrew','maciceland'=>'maciceland','macroman'=>'macroman',
 604          'macromania'=>'macromania','macthai'=>'macthai','macturkish'=>'macturkish','macukraine'=>'macukraine',
 605          'mulelao1'=>'mulelao_1','nextstep'=>'nextstep','riscoslatin1'=>'riscos_latin1','shiftjis'=>'shift_jis',
 606          'shiftjisx0213'=>'shift_jisx0213','tcvn'=>'tcvn','tds565'=>'tds565','tis620'=>'tis_620','viscii'=>'viscii',
 607          'iso885911'=>'iso_8859_11', 'jis0228' => 'jis_0228', 'jis0212' => 'jis_0212'
 608          );
 609          $procesed_charset = isset($alias_xref[$procesed_charset]) ? $alias_xref[$procesed_charset] : $procesed_charset;
 610          $memory[$charset] = isset($alias[$procesed_charset]) ? $alias[$procesed_charset] : false;
 611          if($set_charset){
 612              $this->_currentCharset = $memory[$charset];
 613          }
 614  
 615          return $memory[$charset];
 616      }// -- end of &_GetCharset -- //
 617  
 618      /**
 619      * Encodes given string as UTF8 text.
 620      *
 621      * Given string and charset mapping, returns input string as
 622      * UTF8 text
 623      *
 624      * @access private
 625      * @uses _CharToUtf8
 626      * @see _PhpStringRecode
 627      * @see _Utf8StringDecode
 628      * @param    string    $string    Text to be converted to UTF8
 629      * @param    array    $mapping_array    Array containing the charset mapping.
 630      * @return    string    UTF8 String
 631      */
 632      function _Utf8StringEncode($string, $mapping_array)
 633      {
 634          $chars = unpack('C*', $string);
 635          $count = count($chars);
 636          for($i=1;$i<=$count;$i++){
 637              if(!isset($mapping_array[$chars[$i]])){
 638                  continue;
 639              }else{
 640                  $char = (int)$mapping_array[$chars[$i]];
 641              }
 642              $chars[$i] = $this->_CharToUtf8($char);
 643          }
 644          return implode('',$chars);
 645      }// -- end of &_Utf8StringEncode -- //
 646  
 647      /**
 648      * Decodes data, assumed to be UTF-8 encoded given its
 649      * equivalence map.
 650      *
 651      * @access private
 652      * @uses _Utf8
 653      * @uses ToChar
 654      * @see _PhpStringRecode
 655      * @see _Utf8StringEncode
 656      * @param    string    $utf_string    UTF8 string
 657      * @param    array    $mapping_array    Mapping array
 658      * @return    string    Decoded string
 659      */
 660      function _Utf8StringDecode($utf_string, $mapping_array)
 661      {
 662          $chars = unpack('C*', $utf_string);
 663          $count = count($chars);
 664          $result = '';
 665          for ($i=1;$i<=$count;$i++){
 666              $result .= $this->_Utf8ToChar($chars,$i,$mapping_array);
 667          }
 668          return $result;
 669      }// -- end of &_Utf8StringDecode -- //
 670  
 671  
 672      /**
 673      * Converts a single character to its UTF8 representation
 674      *
 675      * @access protected
 676      * @see _Utf8StringEncode
 677      * @param    string    $char    Char to be converted
 678      * @return    string    UTF8 char
 679      */
 680      function _CharToUtf8($char)
 681      {
 682          if ($char < 0x80){
 683              $utf8_char = chr($char);
 684              // 2 bytes
 685          }else if($char<0x800){
 686              $utf8_char = (chr(0xC0 | $char>>6) . chr(0x80 | $char & 0x3F));
 687              // 3 bytes
 688          }else if($char<0x10000){
 689              $utf8_char = (chr(0xE0 | $char>>12) . chr(0x80 | $char>>6 & 0x3F) . chr(0x80 | $char & 0x3F));
 690              // 4 bytes
 691          }else if($char<0x200000){
 692              $utf8_char = (chr(0xF0 | $char>>18) . chr(0x80 | $char>>12 & 0x3F) . chr(0x80 | $char>>6 & 0x3F) . chr(0x80 | $char & 0x3F));
 693          }
 694          return $utf8_char;
 695      }// -- end of &_CharToUtf8 -- //
 696  
 697  
 698  
 699      /**
 700      * Decodes a single UTF8 char to it's representation as
 701      * specified in the mapping array
 702      *
 703      * @access private
 704      * @see _Utf8StringDecode
 705      * @param    array    $chars    Assoc array with chars to be decoded
 706      * @param    integer    &$id    Current char position
 707      * @param    array    $mapping_array    Mapping Array
 708      * @return    string    Decoded char
 709      */
 710      function _Utf8ToChar($chars, &$id, $mapping_array)
 711      {
 712          if(($chars[$id]>=240)&&($chars[$id]<=255)){
 713              $utf=(intval($chars[$id]-240)<<18)+(intval($chars[++$id]-128)<<12)+(intval($chars[++$id]-128)<<6)+(intval($chars[++$id]-128)<<0);
 714          }elseif(($chars[$id]>=224)&&($chars[$id]<=239)){
 715              $utf=(intval($chars[$id]-224)<<12)+(intval($chars[++$id]-128)<<6)+(intval($chars[++$id]-128)<<0);
 716          }elseif(($chars[$id]>=192)&&($chars[$id]<=223)){
 717              $utf=(intval($chars[$id]-192)<<6)+(intval($chars[++$id]-128)<<0);
 718          }else{
 719              $utf=$chars[$id];
 720          }
 721          if(array_key_exists($utf,$mapping_array)){
 722              return chr($mapping_array[$utf]);
 723          }else{
 724              return $this->utf8ErrorChar;
 725          }
 726      }// -- end of &_Utf8ToChar -- //
 727  
 728  
 729      function isUtf8($text = '')
 730      {
 731          // From http://w3.org/International/questions/qa-forms-utf-8.html
 732          return preg_match('%^(?:[\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$%xs', $text);
 733      }
 734  
 735      function _charsetMapFileExists($charset)
 736      {
 737          if(!file_exists(AK_LIB_DIR.DS.'AkCharset'.DS.'utf8_mappings'.DS.$charset.'.php')){
 738              trigger_error(Ak::t('Charset %charset is not supported on your current setting. Please download aditional charset maps from http://svn.akelos.org/extras/utf8_mappings/ into lib/AkActionView/utf8_mappings', array('%charset'=>$charset)), E_USER_NOTICE);
 739              return false;
 740          }
 741          return true;
 742      }
 743  }
 744  
 745  ?>


Generated: Mon Oct 27 12:43:49 2008 Cross-referenced by PHPXref 0.6