Overview

Namespaces

  • Jyxo
    • Beholder
      • TestCase
    • Gettext
      • Parser
    • Input
      • Chain
      • Filter
      • Validator
    • Mail
      • Email
        • Attachment
      • Parser
      • Sender
    • Rpc
      • Json
      • Xml
    • Shell
    • Spl
    • Svn
    • Time
    • Webdav
  • PHP

Classes

  • Charset
  • Color
  • Css
  • ErrorHandler
  • ErrorMail
  • FirePhp
  • Html
  • HtmlTag
  • SpamFilter
  • String
  • Timer
  • XmlReader

Exceptions

  • Exception
  • Overview
  • Namespace
  • Class
  • Tree
  • Deprecated
  1: <?php
  2: 
  3: /**
  4:  * Jyxo PHP Library
  5:  *
  6:  * LICENSE
  7:  *
  8:  * This source file is subject to the new BSD license that is bundled
  9:  * with this package in the file license.txt.
 10:  * It is also available through the world-wide-web at this URL:
 11:  * https://github.com/jyxo/php/blob/master/license.txt
 12:  */
 13: 
 14: namespace Jyxo;
 15: 
 16: /**
 17:  * Base class for common charset operations.
 18:  *
 19:  * @category Jyxo
 20:  * @package Jyxo\Charset
 21:  * @copyright Copyright (c) 2005-2011 Jyxo, s.r.o.
 22:  * @license https://github.com/jyxo/php/blob/master/license.txt
 23:  * @author Jan Tichý
 24:  * @author Jaroslav Hanslík
 25:  * @author Štěpán Svoboda
 26:  */
 27: class Charset
 28: {
 29:     /**
 30:      * Detects charset of a given string.
 31:      *
 32:      * @param string $string String to detect
 33:      * @return string
 34:      */
 35:     public static function detect($string)
 36:     {
 37:         $charset = mb_detect_encoding($string, 'UTF-8, ISO-8859-2, ASCII, UTF-7, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP');
 38: 
 39:         // The previous function can not handle WINDOWS-1250 and returns ISO-8859-2 instead
 40:         if ('ISO-8859-2' === $charset && preg_match('~[\x7F-\x9F\xBC]~', $string)) {
 41:             $charset = 'WINDOWS-1250';
 42:         }
 43: 
 44:         return $charset;
 45:     }
 46: 
 47:     /**
 48:      * Converts a string from various charsets to UTF-8.
 49:      *
 50:      * The charset is automatically detected if not given.
 51:      *
 52:      * @param string $string String to convert
 53:      * @param string $charset Actual charset
 54:      * @return string
 55:      */
 56:     public static function convert2utf($string, $charset = '')
 57:     {
 58:         $charset = $charset ?: self::detect($string);
 59:         // Detection sometimes fails or the string may be in wrong format, so we remove invalid UTF-8 letters
 60:         return @iconv($charset, 'UTF-8//TRANSLIT//IGNORE', $string);
 61:     }
 62: 
 63:     /**
 64:      * Converts a string from UTF-8 to an identifier form.
 65:      *
 66:      * @param string $string String to convert
 67:      * @return string
 68:      */
 69:     public static function utf2ident($string)
 70:     {
 71:         // Convert to lowercase ASCII and than all non-alphanumeric characters to dashes
 72:         $ident = preg_replace('~[^a-z0-9]~', '-', strtolower(self::utf2ascii($string)));
 73:         // Remove multiple dashes and dashes on boundaries
 74:         return trim(preg_replace('~-+~', '-', $ident), '-');
 75:     }
 76: 
 77:     /**
 78:      * Converts a string from UTF-8 to ASCII.
 79:      *
 80:      * @param string $string String to convert
 81:      * @return string
 82:      */
 83:     public static function utf2ascii($string)
 84:     {
 85:         static $replace = array(
 86:             'á' => 'a', 'Á' => 'A', 'ä' => 'a', 'Ä' => 'A', 'â' => 'a', 'Â' => 'A', 'ă' => 'a', 'Ă' => 'A', 'ą' => 'a', 'Ą' => 'A',
 87:             'č' => 'c', 'Č' => 'C', 'ç' => 'c', 'Ç' => 'C', 'ć' => 'c', 'Ć' => 'C', 'ď' => 'd', 'Ď' => 'D', 'đ' => 'd', 'Đ' => 'D',
 88:             'é' => 'e', 'É' => 'E', 'ě' => 'e', 'Ě' => 'E', 'ë' => 'e', 'Ë' => 'E', 'ę' => 'e', 'Ę' => 'E', 'í' => 'i', 'Í' => 'I',
 89:             'î' => 'i', 'Î' => 'I', 'ł' => 'l', 'Ł' => 'L', 'ľ' => 'l', 'Ľ' => 'L', 'ĺ' => 'l', 'Ĺ' => 'L', 'ń' => 'n', 'Ń' => 'N',
 90:             'ň' => 'n', 'Ň' => 'N', 'ó' => 'o', 'Ó' => 'O', 'ô' => 'o', 'Ô' => 'O', 'ö' => 'o', 'Ö' => 'O', 'ő' => 'o', 'Ő' => 'O',
 91:             'o' => 'o', 'O' => 'O', 'ř' => 'r', 'Ř' => 'R', 'ŕ' => 'r', 'Ŕ' => 'R', 'š' => 's', 'Š' => 'S', 'ś' => 's', 'Ś' => 'S',
 92:             'ş' => 's', 'Ş' => 'S', 'ť' => 't', 'Ť' => 'T', 'ţ' => 't', 'Ţ' => 'T', 'ú' => 'u', 'Ú' => 'U', 'ů' => 'u', 'Ů' => 'U',
 93:             'ü' => 'u', 'Ü' => 'U', 'ű' => 'u', 'Ű' => 'U', 'ý' => 'y', 'Ý' => 'Y', 'ž' => 'z', 'Ž' => 'Z', 'ź' => 'z', 'Ź' => 'Z',
 94:             'ż' => 'z', 'Ż' => 'Z', 'ß' => 'ss', 'å' => 'a', 'Å' => 'A'
 95:         );
 96:         return strtr($string, $replace);
 97:     }
 98: 
 99:     /**
100:      * Phonetical transcription of a Cyrillic string into ASCII.
101:      *
102:      * @param string $string String to convert
103:      * @return string
104:      */
105:     public static function russian2ascii($string)
106:     {
107:         static $russian = array(
108:             'КВ', 'кв', 'КС', 'кс', 'А', 'а', 'Б', 'б', 'Ц', 'ц', 'Д', 'д', 'Э', 'э', 'Е', 'е', 'Ф', 'ф', 'Г', 'г', 'Х', 'х',
109:             'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р', 'С', 'с', 'Т', 'т', 'У',
110:             'у', 'В', 'в', 'В', 'в', 'Ы', 'ы', 'З', 'з', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ж', 'ж', 'Я', 'я', 'Ю', 'ю', 'ъ', 'ь'
111:         );
112:         static $ascii = array(
113:             'Q', 'q', 'X', 'x', 'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I',
114:             'i', 'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'R', 'r', 'S', 's', 'T', 't', 'U', 'u',
115:             'V', 'v', 'W', 'w', 'Y', 'y', 'Z', 'z', 'Ch', 'ch', 'Sh', 'sh', 'Sht', 'sht', 'Zh', 'zh', 'Ja', 'ja', 'Ju', 'ju'
116:         );
117:         return str_replace($russian, $ascii, $string);
118:     }
119: 
120:     /**
121:      * Converts a string from CP-1250 to ASCII.
122:      *
123:      * @param string $string String to convert
124:      * @return string
125:      */
126:     public static function win2ascii($string)
127:     {
128:         return strtr($string,
129:             "\xe1\xe4\xe8\xef\xe9\xec\xed\xbe\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\x9a\x9d\xfa\xf9\xfc\xfb\xfd\x9e"
130:             . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xbc\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\x8a\x8d\xda\xd9\xdc\xdb\xdd\x8e",
131:             'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
132:         );
133:     }
134: 
135: 
136:     /**
137:      * Converts a string from ISO-8859-2 to ASCII.
138:      *
139:      * @param string $string String to convert
140:      * @return string
141:      */
142:     public static function iso2ascii($string)
143:     {
144:         return strtr($string,
145:             "\xe1\xe4\xe8\xef\xe9\xec\xed\xb5\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\xb9\xbb\xfa\xf9\xfc\xfb\xfd\xbe"
146:             . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xa5\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\xa9\xab\xda\xd9\xdc\xdb\xdd\xae",
147:             'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
148:         );
149:     }
150: 
151:     /**
152:      * Transliterates or removes unknown UTF-8 characters from a string.
153:      *
154:      * @param string $string String to fix
155:      * @return string
156:      */
157:     public static function fixUtf($string)
158:     {
159:         return @iconv('UTF-8', 'UTF-8//TRANSLIT//IGNORE', $string);
160:     }
161: 
162: }
163: 
Jyxo PHP Library API documentation generated by ApiGen 2.3.0