Overview

Packages

  • Jyxo_Beholder
  • Jyxo_Charset
  • Jyxo_Color
  • Jyxo_Css
  • Jyxo_ErrorHandling
  • Jyxo_FirePhp
  • Jyxo_Gettext
    • Parser
  • Jyxo_Html
  • Jyxo_Input
    • Chain
    • Filter
    • Validator
  • Jyxo_Mail
    • Email
    • Parser
    • Sender
  • Jyxo_Rpc
    • Json
    • Xml
  • Jyxo_Shell
  • Jyxo_SpamFilter
  • Jyxo_Spl
  • Jyxo_String
  • Jyxo_Svn
  • Jyxo_Time
  • Jyxo_Timer
  • Jyxo_Webdav
  • Jyxo_XmlReader
  • PHP

Classes

  • Jyxo_Charset
  • Overview
  • Package
  • Class
  • Tree
  • Deprecated
  1: <?php
  2: 
  3: /**
  4:  * Jyxo PHP Library
  5:  *
  6:  * LICENSE
  7:  *
  8:  * This source file is subject to the new BSD license that is bundled
  9:  * with this package in the file license.txt.
 10:  * It is also available through the world-wide-web at this URL:
 11:  * https://github.com/jyxo/php/blob/master/license.txt
 12:  */
 13: 
 14: /**
 15:  * Base class for common charset operations.
 16:  *
 17:  * @category Jyxo
 18:  * @package Jyxo_Charset
 19:  * @copyright Copyright (c) 2005-2011 Jyxo, s.r.o.
 20:  * @license https://github.com/jyxo/php/blob/master/license.txt
 21:  * @author Jan Tichý
 22:  * @author Jaroslav Hanslík
 23:  * @author Štěpán Svoboda
 24:  */
 25: class Jyxo_Charset
 26: {
 27:     /**
 28:      * Detects charset of a given string.
 29:      *
 30:      * @param string $string String to detect
 31:      * @return string
 32:      */
 33:     public static function detect($string)
 34:     {
 35:         $charset = mb_detect_encoding($string, 'UTF-8, ISO-8859-2, ASCII, UTF-7, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP');
 36: 
 37:         // The previous function can not handle WINDOWS-1250 and returns ISO-8859-2 instead
 38:         if ('ISO-8859-2' === $charset && preg_match('~[\x7F-\x9F\xBC]~', $string)) {
 39:             $charset = 'WINDOWS-1250';
 40:         }
 41: 
 42:         return $charset;
 43:     }
 44: 
 45:     /**
 46:      * Converts a string from various charsets to UTF-8.
 47:      *
 48:      * The charset is automatically detected if not given.
 49:      *
 50:      * @param string $string String to convert
 51:      * @param string $charset Actual charset
 52:      * @return string
 53:      */
 54:     public static function convert2utf($string, $charset = '')
 55:     {
 56:         $charset = $charset ?: self::detect($string);
 57:         // Detection sometimes fails or the string may be in wrong format, so we remove invalid UTF-8 letters
 58:         return @iconv($charset, 'UTF-8//TRANSLIT//IGNORE', $string);
 59:     }
 60: 
 61:     /**
 62:      * Converts a string from UTF-8 to an identifier form.
 63:      *
 64:      * @param string $string String to convert
 65:      * @return string
 66:      */
 67:     public static function utf2ident($string)
 68:     {
 69:         // Convert to lowercase ASCII and than all non-alphanumeric characters to dashes
 70:         $ident = preg_replace('~[^a-z0-9]~', '-', strtolower(self::utf2ascii($string)));
 71:         // Remove multiple dashes and dashes on boundaries
 72:         return trim(preg_replace('~-+~', '-', $ident), '-');
 73:     }
 74: 
 75:     /**
 76:      * Converts a string from UTF-8 to ASCII.
 77:      *
 78:      * @param string $string String to convert
 79:      * @return string
 80:      */
 81:     public static function utf2ascii($string)
 82:     {
 83:         static $replace = array(
 84:             'á' => 'a', 'Á' => 'A', 'ä' => 'a', 'Ä' => 'A', 'â' => 'a', 'Â' => 'A', 'ă' => 'a', 'Ă' => 'A', 'ą' => 'a', 'Ą' => 'A',
 85:             'č' => 'c', 'Č' => 'C', 'ç' => 'c', 'Ç' => 'C', 'ć' => 'c', 'Ć' => 'C', 'ď' => 'd', 'Ď' => 'D', 'đ' => 'd', 'Đ' => 'D',
 86:             'é' => 'e', 'É' => 'E', 'ě' => 'e', 'Ě' => 'E', 'ë' => 'e', 'Ë' => 'E', 'ę' => 'e', 'Ę' => 'E', 'í' => 'i', 'Í' => 'I',
 87:             'î' => 'i', 'Î' => 'I', 'ł' => 'l', 'Ł' => 'L', 'ľ' => 'l', 'Ľ' => 'L', 'ĺ' => 'l', 'Ĺ' => 'L', 'ń' => 'n', 'Ń' => 'N',
 88:             'ň' => 'n', 'Ň' => 'N', 'ó' => 'o', 'Ó' => 'O', 'ô' => 'o', 'Ô' => 'O', 'ö' => 'o', 'Ö' => 'O', 'ő' => 'o', 'Ő' => 'O',
 89:             'o' => 'o', 'O' => 'O', 'ř' => 'r', 'Ř' => 'R', 'ŕ' => 'r', 'Ŕ' => 'R', 'š' => 's', 'Š' => 'S', 'ś' => 's', 'Ś' => 'S',
 90:             'ş' => 's', 'Ş' => 'S', 'ť' => 't', 'Ť' => 'T', 'ţ' => 't', 'Ţ' => 'T', 'ú' => 'u', 'Ú' => 'U', 'ů' => 'u', 'Ů' => 'U',
 91:             'ü' => 'u', 'Ü' => 'U', 'ű' => 'u', 'Ű' => 'U', 'ý' => 'y', 'Ý' => 'Y', 'ž' => 'z', 'Ž' => 'Z', 'ź' => 'z', 'Ź' => 'Z',
 92:             'ż' => 'z', 'Ż' => 'Z', 'ß' => 'ss', 'å' => 'a', 'Å' => 'A'
 93:         );
 94:         return strtr($string, $replace);
 95:     }
 96: 
 97:     /**
 98:      * Phonetical transcription of a Cyrillic string into ASCII.
 99:      *
100:      * @param string $string String to convert
101:      * @return string
102:      */
103:     public static function russian2ascii($string)
104:     {
105:         static $russian = array(
106:             'КВ', 'кв', 'КС', 'кс', 'А', 'а', 'Б', 'б', 'Ц', 'ц', 'Д', 'д', 'Э', 'э', 'Е', 'е', 'Ф', 'ф', 'Г', 'г', 'Х', 'х',
107:             'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р', 'С', 'с', 'Т', 'т', 'У',
108:             'у', 'В', 'в', 'В', 'в', 'Ы', 'ы', 'З', 'з', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ж', 'ж', 'Я', 'я', 'Ю', 'ю', 'ъ', 'ь'
109:         );
110:         static $ascii = array(
111:             'Q', 'q', 'X', 'x', 'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I',
112:             'i', 'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'R', 'r', 'S', 's', 'T', 't', 'U', 'u',
113:             'V', 'v', 'W', 'w', 'Y', 'y', 'Z', 'z', 'Ch', 'ch', 'Sh', 'sh', 'Sht', 'sht', 'Zh', 'zh', 'Ja', 'ja', 'Ju', 'ju'
114:         );
115:         return str_replace($russian, $ascii, $string);
116:     }
117: 
118:     /**
119:      * Converts a string from CP-1250 to ASCII.
120:      *
121:      * @param string $string String to convert
122:      * @return string
123:      */
124:     public static function win2ascii($string)
125:     {
126:         return strtr($string,
127:             "\xe1\xe4\xe8\xef\xe9\xec\xed\xbe\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\x9a\x9d\xfa\xf9\xfc\xfb\xfd\x9e"
128:             . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xbc\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\x8a\x8d\xda\xd9\xdc\xdb\xdd\x8e",
129:             'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
130:         );
131:     }
132: 
133: 
134:     /**
135:      * Converts a string from ISO-8859-2 to ASCII.
136:      *
137:      * @param string $string String to convert
138:      * @return string
139:      */
140:     public static function iso2ascii($string)
141:     {
142:         return strtr($string,
143:             "\xe1\xe4\xe8\xef\xe9\xec\xed\xb5\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\xb9\xbb\xfa\xf9\xfc\xfb\xfd\xbe"
144:             . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xa5\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\xa9\xab\xda\xd9\xdc\xdb\xdd\xae",
145:             'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
146:         );
147:     }
148: 
149:     /**
150:      * Transliterates or removes unknown UTF-8 characters from a string.
151:      *
152:      * @param string $string String to fix
153:      * @return string
154:      */
155:     public static function fixUtf($string)
156:     {
157:         return @iconv('UTF-8', 'UTF-8//TRANSLIT//IGNORE', $string);
158:     }
159: 
160: }
161: 
Jyxo PHP Library API documentation generated by ApiGen 2.3.0