1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10: 11: 12:
13:
14: namespace Jyxo;
15:
16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26:
27: class Charset
28: {
29: 30: 31: 32: 33: 34:
35: public static function detect($string)
36: {
37: $charset = mb_detect_encoding($string, 'UTF-8, ISO-8859-2, ASCII, UTF-7, EUC-JP, SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP');
38:
39:
40: if ('ISO-8859-2' === $charset && preg_match('~[\x7F-\x9F\xBC]~', $string)) {
41: $charset = 'WINDOWS-1250';
42: }
43:
44: return $charset;
45: }
46:
47: 48: 49: 50: 51: 52: 53: 54: 55:
56: public static function convert2utf($string, $charset = '')
57: {
58: $charset = $charset ?: self::detect($string);
59:
60: return @iconv($charset, 'UTF-8//TRANSLIT//IGNORE', $string);
61: }
62:
63: 64: 65: 66: 67: 68:
69: public static function utf2ident($string)
70: {
71:
72: $ident = preg_replace('~[^a-z0-9]~', '-', strtolower(self::utf2ascii($string)));
73:
74: return trim(preg_replace('~-+~', '-', $ident), '-');
75: }
76:
77: 78: 79: 80: 81: 82:
83: public static function utf2ascii($string)
84: {
85: static $replace = array(
86: 'á' => 'a', 'Á' => 'A', 'ä' => 'a', 'Ä' => 'A', 'â' => 'a', 'Â' => 'A', 'ă' => 'a', 'Ă' => 'A', 'ą' => 'a', 'Ą' => 'A',
87: 'č' => 'c', 'Č' => 'C', 'ç' => 'c', 'Ç' => 'C', 'ć' => 'c', 'Ć' => 'C', 'ď' => 'd', 'Ď' => 'D', 'đ' => 'd', 'Đ' => 'D',
88: 'é' => 'e', 'É' => 'E', 'ě' => 'e', 'Ě' => 'E', 'ë' => 'e', 'Ë' => 'E', 'ę' => 'e', 'Ę' => 'E', 'í' => 'i', 'Í' => 'I',
89: 'î' => 'i', 'Î' => 'I', 'ł' => 'l', 'Ł' => 'L', 'ľ' => 'l', 'Ľ' => 'L', 'ĺ' => 'l', 'Ĺ' => 'L', 'ń' => 'n', 'Ń' => 'N',
90: 'ň' => 'n', 'Ň' => 'N', 'ó' => 'o', 'Ó' => 'O', 'ô' => 'o', 'Ô' => 'O', 'ö' => 'o', 'Ö' => 'O', 'ő' => 'o', 'Ő' => 'O',
91: 'o' => 'o', 'O' => 'O', 'ř' => 'r', 'Ř' => 'R', 'ŕ' => 'r', 'Ŕ' => 'R', 'š' => 's', 'Š' => 'S', 'ś' => 's', 'Ś' => 'S',
92: 'ş' => 's', 'Ş' => 'S', 'ť' => 't', 'Ť' => 'T', 'ţ' => 't', 'Ţ' => 'T', 'ú' => 'u', 'Ú' => 'U', 'ů' => 'u', 'Ů' => 'U',
93: 'ü' => 'u', 'Ü' => 'U', 'ű' => 'u', 'Ű' => 'U', 'ý' => 'y', 'Ý' => 'Y', 'ž' => 'z', 'Ž' => 'Z', 'ź' => 'z', 'Ź' => 'Z',
94: 'ż' => 'z', 'Ż' => 'Z', 'ß' => 'ss', 'å' => 'a', 'Å' => 'A'
95: );
96: return strtr($string, $replace);
97: }
98:
99: 100: 101: 102: 103: 104:
105: public static function russian2ascii($string)
106: {
107: static $russian = array(
108: 'КВ', 'кв', 'КС', 'кс', 'А', 'а', 'Б', 'б', 'Ц', 'ц', 'Д', 'д', 'Э', 'э', 'Е', 'е', 'Ф', 'ф', 'Г', 'г', 'Х', 'х',
109: 'И', 'и', 'Й', 'й', 'К', 'к', 'Л', 'л', 'М', 'м', 'Н', 'н', 'О', 'о', 'П', 'п', 'Р', 'р', 'С', 'с', 'Т', 'т', 'У',
110: 'у', 'В', 'в', 'В', 'в', 'Ы', 'ы', 'З', 'з', 'Ч', 'ч', 'Ш', 'ш', 'Щ', 'щ', 'Ж', 'ж', 'Я', 'я', 'Ю', 'ю', 'ъ', 'ь'
111: );
112: static $ascii = array(
113: 'Q', 'q', 'X', 'x', 'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'E', 'e', 'F', 'f', 'G', 'g', 'H', 'h', 'I',
114: 'i', 'J', 'j', 'K', 'k', 'L', 'l', 'M', 'm', 'N', 'n', 'O', 'o', 'P', 'p', 'R', 'r', 'S', 's', 'T', 't', 'U', 'u',
115: 'V', 'v', 'W', 'w', 'Y', 'y', 'Z', 'z', 'Ch', 'ch', 'Sh', 'sh', 'Sht', 'sht', 'Zh', 'zh', 'Ja', 'ja', 'Ju', 'ju'
116: );
117: return str_replace($russian, $ascii, $string);
118: }
119:
120: 121: 122: 123: 124: 125:
126: public static function win2ascii($string)
127: {
128: return strtr($string,
129: "\xe1\xe4\xe8\xef\xe9\xec\xed\xbe\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\x9a\x9d\xfa\xf9\xfc\xfb\xfd\x9e"
130: . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xbc\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\x8a\x8d\xda\xd9\xdc\xdb\xdd\x8e",
131: 'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
132: );
133: }
134:
135:
136: 137: 138: 139: 140: 141:
142: public static function iso2ascii($string)
143: {
144: return strtr($string,
145: "\xe1\xe4\xe8\xef\xe9\xec\xed\xb5\xe5\xf2\xf3\xf6\xf5\xf4\xf8\xe0\xb9\xbb\xfa\xf9\xfc\xfb\xfd\xbe"
146: . "\xc1\xc4\xc8\xcf\xc9\xcc\xcd\xa5\xc5\xd2\xd3\xd6\xd5\xd4\xd8\xc0\xa9\xab\xda\xd9\xdc\xdb\xdd\xae",
147: 'aacdeeillnoooorrstuuuuyzAACDEEILLNOOOORRSTUUUUYZ'
148: );
149: }
150:
151: 152: 153: 154: 155: 156:
157: public static function fixUtf($string)
158: {
159: return @iconv('UTF-8', 'UTF-8//TRANSLIT//IGNORE', $string);
160: }
161:
162: }
163: