1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11: 12:
13:
14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24:
25: class Jyxo_Charset
26: {
27: 28: 29: 30: 31: 32:
33: public static function detect($string)
34: {
35: $charset = mb_detect_encoding($string, 'UTF-8, ISO-8859-2, ASCII, UTF-7, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP');
36:
37:
38: if ('ISO-8859-2' === $charset && preg_match('~[\x7F-\x9F\xBC]~', $string)) {
39: $charset = 'WINDOWS-1250';
40: }
41:
42: return $charset;
43: }
44:
45: 46: 47: 48: 49: 50: 51: 52: 53:
54: public static function convert2utf($string, $charset = '')
55: {
56: $charset = $charset ?: self::detect($string);
57:
58: return @iconv($charset, 'UTF-8//TRANSLIT//IGNORE', $string);
59: }
60:
61: 62: 63: 64: 65: 66:
67: public static function utf2ident($string)
68: {
69:
70: $ident = preg_replace('~[^a-z0-9]~', '-', strtolower(self::utf2ascii($string)));
71:
72: return trim(preg_replace('~-+~', '-', $ident), '-');
73: }
74:
75: 76: 77: 78: 79: 80:
81: public static function utf2ascii($string)
82: {
83: static $replace = array(
84: 'á' => 'a', 'Á' => 'A', 'ä' => 'a', 'Ä' => 'A', 'â' => 'a', 'Â' => 'A', 'ă' => 'a', 'Ă' => 'A', 'ą' => 'a', 'Ą' => 'A',
85: 'č' => 'c', 'Č' => 'C', 'ç' => 'c', 'Ç' => 'C', 'ć' => 'c', 'Ć' => 'C', 'ď' => 'd', 'Ď' => 'D', 'đ' => 'd', 'Đ' => 'D',
86: 'é' => 'e', 'É' => 'E', 'ě' => 'e', 'Ě' => 'E', 'ë' => 'e', 'Ë' => 'E', 'ę' => 'e', 'Ę' => 'E', 'í' => 'i', 'Í' => 'I',
87: 'î' => 'i', 'Î' => 'I', 'ł' => 'l', 'Ł' => 'L', 'ľ' => 'l', 'Ľ' => 'L', 'ĺ' => 'l', 'Ĺ' => 'L', 'ń' => 'n', 'Ń' => 'N',
88: 'ň' => 'n', 'Ň' => 'N', 'ó' => 'o', 'Ó' => 'O', 'ô' => 'o', 'Ô' => 'O', 'ö' => 'o', 'Ö' => 'O', 'ő' => 'o', 'Ő' => 'O',
89: 'o' => 'o', 'O' => 'O', 'ř' => 'r', 'Ř' => 'R', 'ŕ' => 'r', 'Ŕ' => 'R', 'š' => 's', 'Š' => 'S', 'ś' => 's', 'Ś' => 'S',
90: 'ş' => 's', 'Ş' => 'S', 'ť' => 't', 'Ť' => 'T', 'ţ' => 't', 'Ţ' => 'T', 'ú' => 'u', 'Ú' => 'U', 'ů' => 'u', 'Ů' => 'U',
91: 'ü' => 'u', 'Ü' => 'U', 'ű' => 'u', 'Ű' => 'U', 'ý' => 'y', 'Ý' => 'Y', 'ž' => 'z', 'Ž' => 'Z', 'ź' => 'z', 'Ź' => 'Z',
92: 'ż' => 'z', 'Ż' => 'Z', 'ß' => 'ss', 'å' => 'a', 'Å' => 'A'
93: );
94: return strtr($string, $replace);
95: }
96:
97: 98: 99: 100: 101: 102:
103: public static function russian2ascii($string)
104: {
105: static $russian = array(
106: 'КВ', 'кв', 'КС', 'кс', 'А', 'а', 'Б', 'б', 'Ц', 'ц', 'Д', 'д', 'Э', 'э', 'Е', 'е', 'Ф', 'ф', 'Г', 'г', 'Х', 'х',
107: 'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р', 'С', 'с', 'Т', 'т', 'У',
108: 'у', 'В', 'в', 'В', 'в', 'Ы', 'ы', 'З', 'з', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ж', 'ж', 'Я', 'я', 'Ю', 'ю', 'ъ', 'ь'
109: );
110: static $ascii = array(
111: 'Q', 'q', 'X', 'x', 'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I',
112: 'i', 'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'R', 'r', 'S', 's', 'T', 't', 'U', 'u',
113: 'V', 'v', 'W', 'w', 'Y', 'y', 'Z', 'z', 'Ch', 'ch', 'Sh', 'sh', 'Sht', 'sht', 'Zh', 'zh', 'Ja', 'ja', 'Ju', 'ju'
114: );
115: return str_replace($russian, $ascii, $string);
116: }
117:
118: 119: 120: 121: 122: 123:
124: public static function win2ascii($string)
125: {
126: return strtr($string,
127: "\xe1\xe4\xe8\xef\xe9\xec\xed\xbe\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\x9a\x9d\xfa\xf9\xfc\xfb\xfd\x9e"
128: . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xbc\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\x8a\x8d\xda\xd9\xdc\xdb\xdd\x8e",
129: 'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
130: );
131: }
132:
133:
134: 135: 136: 137: 138: 139:
140: public static function iso2ascii($string)
141: {
142: return strtr($string,
143: "\xe1\xe4\xe8\xef\xe9\xec\xed\xb5\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\xb9\xbb\xfa\xf9\xfc\xfb\xfd\xbe"
144: . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xa5\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\xa9\xab\xda\xd9\xdc\xdb\xdd\xae",
145: 'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
146: );
147: }
148:
149: 150: 151: 152: 153: 154:
155: public static function fixUtf($string)
156: {
157: return @iconv('UTF-8', 'UTF-8//TRANSLIT//IGNORE', $string);
158: }
159:
160: }
161: