User:AlefZet/crh/LanguageCrh.php/Temp
/*
*/ <?php /** Crimean Tatar (Qırımtatarca) * converter routines * * @package MediaWiki * @subpackage Language */ require_once( dirname(__FILE__).'/../LanguageConverter.php' ); require_once( dirname(__FILE__).'/LanguageCrh_latn.php' ); class CrhConverter extends LanguageConverter { var $mLatinToCyrillic = array( /* 'İYA' => 'ИЯ', 'İya' => 'Ия', 'iya' => 'ия', 'İYE' => 'ИЕ', 'İye' => 'Ие', 'iye' => 'ие', 'EYE' => 'ЕЕ', 'Eye' => 'Ее', 'eye' => 'ее', 'ÜYÜ' => 'УЮ', 'Üyü' => 'Ую', 'üyü' => 'ую', 'YA' => 'Я', 'Ya' => 'Я', 'ya' => 'я', 'YE' => 'Е', 'Ye' => 'Е', 'ye' => 'е', 'YO' => 'Ё', 'Yo' => 'Ё', 'yo' => 'ё', 'YÖ' => 'Ё', 'Yö' => 'Ё', 'yö' => 'ё', 'YU' => 'Ю', 'Yu' => 'Ю', 'yu' => 'ю', 'YÜ' => 'Ю', 'Yü' => 'Ю', 'yü' => 'ю', //'ŞÇ' => 'Щ', 'Şç' => 'Щ', 'şç' => 'щ', //'TS' => 'Ц', 'Ts' => 'Ц', 'ts' => 'ц', 'a' => 'а', 'â' => 'я', 'b' => 'б', 'c' => 'дж', 'ç' => 'ч', 'd' => 'д', 'e' => 'е', 'f' => 'ф', 'g' => 'г', 'ğ' => 'гъ', 'h' => 'х', 'i' => 'и', 'ı' => 'ы', 'j' => 'ж', 'k' => 'к', 'l' => 'л', 'm' => 'м', 'n' => 'н', 'ñ' => 'нъ', 'o' => 'о', 'ö' => 'ё', 'p' => 'п', 'q' => 'къ', 'r' => 'р', 's' => 'с', 'ş' => 'ш', 't' => 'т', 'u' => 'у', 'ü' => 'ю', 'v' => 'в', 'w' => 'в', 'x' => 'кс', 'y' => 'й', 'z' => 'з', 'A' => 'А', 'Â' => 'Я', 'B' => 'Б', 'C' => 'Дж', 'Ç' => 'Ч', 'D' => 'Д', 'E' => 'Е', 'F' => 'Ф', 'G' => 'Г', 'Ğ' => 'Гъ', 'H' => 'Х', 'İ' => 'И', 'I' => 'Ы', 'J' => 'Ж', 'K' => 'К', 'L' => 'Л', 'M' => 'М', 'N' => 'Н', 'Ñ' => 'Нъ', 'O' => 'О', 'Ö' => 'Ё', 'P' => 'П', 'Q' => 'Къ', 'R' => 'Р', 'S' => 'С', 'Ş' => 'Ш', 'T' => 'Т', 'U' => 'У', 'Ü' => 'Ю', 'V' => 'В', 'W' => 'В', 'X' => 'Кс', 'Y' => 'Й', 'Z' => 'З' */ ); var $mCyrillicToLatin = array( /* 'ГЪ' => 'Ğ', 'Гъ' => 'Ğ', 'гъ' => 'ğ', 'КЪ' => 'Q', 'Къ' => 'Q', 'къ' => 'q', 'НЪ' => 'Ñ', 'Нъ' => 'Ñ', 'нъ' => 'ñ', 'ДЖ' => 'C', 'Дж' => 'C', 'дж' => 'c', 'ЪЕ' => 'YE', 'ъе' => 'ye', 'ЪЁ' => 'YO', 'ъё' => 'yo', 'ЬЕ' => 'YE', 'ье' => 'ye', 'ЬЁ' => 'YO', 'ьё' => 'yo', 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', 'е' => 'e', 'ё' => 'ö', 'ж' => 'j', 'з' => 'z', 'и' => 'i', 'й' => 'y', 'к' => 'k', 'л' => 'l', 'м' => 'm', 'н' => 'n', 'о' => 'o', 'п' => 'p', 'р' => 'r', 'с' => 's', 'т' => 't', 'у' => 'u', 'ф' => 'f', 'х' => 'h', 'ц' => 'ts', 'ч' => 'ç', 'ш' => 'ş', 'щ' => 'şç', 'ъ' => '', 'ы' => 'ı', 'ь' => '', 'э' => 'e', 'ю' => 'ü', 'я' => 'â', 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', 'Е' => 'E', 'Ё' => 'Ö', 'Ж' => 'J', 'З' => 'Z', 'И' => 'İ', 'Й' => 'Y', 'К' => 'K', 'Л' => 'L', 'М' => 'M', 'Н' => 'N', 'О' => 'O', 'П' => 'P', 'Р' => 'R', 'С' => 'S', 'Т' => 'T', 'У' => 'U', 'Ф' => 'F', 'Х' => 'H', 'Ц' => 'Ts', 'Ч' => 'Ç', 'Ш' => 'Ş', 'Щ' => 'Şç', 'Ъ' => '', 'Ы' => 'I', 'Ь' => '', 'Э' => 'É', 'Ю' => 'Ü', 'Я' => 'Â', */ ); function loadDefaultTables() { //require( "includes/CrhConversion.php" ); $this->mTables = array(); $this->mTables['crh-latn'] = $this->mCyrillicToLatin; $this->mTables['crh-cyrl'] = $this->mLatinToCyrillic; $this->mTables['crh'] = array(); } /* * Override function from LanguageConvertor */ function getPreferredVariant(){ global $wgUser, $wgRequest, $wgTitle; // Additional checks: // - There should be no conversion for Talk pages if($wgTitle!=NULL && $wgTitle->isTalkPage()){ return $this->mMainLanguageCode; } return parent::getPreferredVariant(); } /* * A function wrapper, if there is no selected variant, * leave the link names as they were */ function findVariantLink( &$link, &$nt ) { $oldlink=$link; parent::findVariantLink($link,$nt); if($this->getPreferredVariant()==$this->mMainLanguageCode) $link=$oldlink; } /* * We want our external link captions to be converted in variants, * so we return the original text instead -{$text}-, except for URLs */ function markNoConversion($text) { if(preg_match("/^https?:\/\/|ftp:\/\/|irc:\/\//",$text)) return parent::markNoConversion($text); return $text; } /* * An ugly function wrapper for parsing Image titles * (to prevent image name conversion) */ function autoConvert($text, $toVariant=false) { global $wgTitle; if($wgTitle->getNameSpace()==NS_IMAGE){ $imagename = $wgTitle->getNsText(); if(preg_match("/^$imagename:/",$text)) return $text; } return parent::autoConvert($text,$toVariant); } function convertCategoryKey( $key ) { return $this->autoConvert( $key, 'crh-latn' ); } function preConversion( $text, $toVariant ) { if ($toVariant == 'crh'){ // placeholder return $text; } if ($toVariant == 'crh-latn'){ // TODO: fill regexes // regex to Cyrillic -> Latin return $text; } if ($toVariant == 'crh-cyrl'){ // TODO: move to different file // regex to Latin -> Cyrillic // Обозначения латинских букв с диакритиками // A с крышкой - \xc3\x82 a с крышкой - \xc3\xa2 // C с седилем - \xc3\x87 c с седилем - \xc3\xa7 // G с кароном - \xc4\x9e g с кароном - \xc4\x9f // I с точкой - \xc4\xb0 i без точки - \xc4\xb1 // N с тильдой - \xc3\x91 n с тильдой - \xc3\xb1 // O с умляутом - \xc3\x96 o с умляутом - \xc3\xb6 // S с седилем - \xc5\x9e s с седилем - \xc5\x9f // U с умляутом - \xc3\x9c u с умляутом - \xc3\xbc // 0. словарик // 0.A некоторые случаи употребления буквы ц // ц- $ver00011 = '/([\s"\(\-])ts/'; $text = preg_replace($ver00011, "$1ц", $text); $ver00012 = '/([\s"\(\-])T[sS]/'; $text = preg_replace($ver00012, "$1Ц", $text); // -ц $ver00021 = '/ts([\s"\.\,\:)-])/'; $text = preg_replace($ver00021, "ц$1", $text); $ver00022 = '/T[sS]([\s"\.\,\:)-])/'; $text = preg_replace($ver00022, "Ц$1", $text); // -ци- $ver00031 = '/tsi([^z])/'; $text = preg_replace($ver00031, "ци$1", $text); $ver00032 = '/T[sS][i\xc4\xb0]([^zZ])/'; $text = preg_replace($ver00032, "ЦИ$1", $text); // -цо-, -цу- $ver00041 = '/ts([ou])/'; $text = preg_replace($ver00041, "ц$1", $text); $ver00042 = '/T[sS]([oOuU])/'; $text = preg_replace($ver00042, "Ц$1", $text); // - ц согл. - $ver00051 = '/ts([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])/'; $text = preg_replace($ver00051, "ц$1", $text); $ver00052 = '/T[sS]([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])/'; $text = preg_replace($ver00052, "Ц$1", $text); // - согл. ц - $ver00061 = '/([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])ts/'; $text = preg_replace($ver00061, "$1ц", $text); $ver00062 = '/([bc\xc3\xa7dfghjklmnprs\xc5\x9ftvyzBC\xc3\x87DFGHJKLMNPRS\xc5\x9eTVYZ])T[sS]/'; $text = preg_replace($ver00062, "$1Ц", $text); // 1. буквы гъ, къ, нъ $lit112 = '/\xc4\x9e([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/'; $text = preg_replace($lit112, "Гъ$1", $text); $lit113 = '/\xc4\x9e([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/'; $text = preg_replace($lit113, "ГЪ$1", $text); $lit114 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])\xc4\x9e/'; $text = preg_replace($lit114, "$1ГЪ", $text); $lit122 = '/Q([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/'; $text = preg_replace($lit122, "Къ$1", $text); $lit123 = '/Q([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/'; $text = preg_replace($lit123, "КЪ$1", $text); $lit124 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])Q/'; $text = preg_replace($lit124, "$1КЪ", $text); $lit132 = '/\xc3\x91([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz])/'; $text = preg_replace($lit132, "Нъ$1", $text); $lit133 = '/\xc3\x91([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])/'; $text = preg_replace($lit133, "НЪ$1", $text); $lit134 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZ])\xc3\x91/'; $text = preg_replace($lit134, "$1НЪ", $text); // 2. расставляем Ь после Л $lit211 = '/([ei\xc3\xb6\xc3\xbcE\xc4\xb0\xc3\x96\xc3\x9c])l([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyz\s"\.\,\:\)\-])/'; $text = preg_replace($lit211, "$1ль$2", $text); $lit212 = '/([E\xc4\xb0\xc3\x96\xc3\x9c])L([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eHJKLMN\xc3\x91PQRS\xc5\x9eTVYZ\s"\.\,\:\)\-])/'; $text = preg_replace($lit212, "$1ЛЬ$2", $text); // 3. обрабатываем ya и ye // ya $lit311 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])ya/'; $text = preg_replace($lit311, "$1ья", $text); $lit312 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[aA]/'; $text = preg_replace($lit312, "$1ЬЯ", $text); $lit321 = '/ya/'; $text = preg_replace($lit321, "я", $text); $lit322 = '/Y[aA]/'; $text = preg_replace($lit322, "Я", $text); // ye $lit331 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])ye/'; $text = preg_replace($lit331, "$1ье", $text); $lit332 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[eE]/'; $text = preg_replace($lit332, "$1ЬЕ", $text); $lit341 = '/ye/'; $text = preg_replace($lit341, "е", $text); $lit342 = '/Y[eE]/'; $text = preg_replace($lit342, "Е", $text); // 4. расставляем буквы э $lit41 = '/([ae\xc4\xb1io\xc3\xb6u\xc3\xbcаеэяAEI\xc4\xb0O\xc3\x96U\xc3\x9cАЕЭЯ\s"\(\-])e/'; $text = preg_replace($lit41, "$1э", $text); $lit42 = '/([AEI\xc4\xb0O\xc3\x96U\xc3\x9cАЕЭЯ\s"\(\-])E/'; $text = preg_replace($lit42, "$1Э", $text); // 5. буквы ё и ю - первый заход // ё // расставляем мягкие знаки после согласных $lit511 = '/([\s"\(\-])([yY])\xc3\xb6([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit511, "$1$2о$3ь$4", $text); $lit512 = '/([\s"\(\-])([yY])\xc3\x96([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit512, "$1$2у$3Ь$4", $text); // расставляем мягкие знаки перед ё $lit531 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])y[o\xc3\xb6]/'; $text = preg_replace($lit531, "$1ьё", $text); $lit532 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[o\xc3\xb6O\xc3\x96]/'; $text = preg_replace($lit532, "$1ЬЁ", $text); // обрабатываем остальные вхождения ё $lit541 = '/y[o\xc3\xb6]/'; $text = preg_replace($lit541, "ё", $text); $lit542 = '/Y[o\xc3\xb6O\xc3\x96]/'; $text = preg_replace($lit542, "Ё", $text); $lit543 = '/[yY][o\xc3\xb6O\xc3\x96]/'; $text = preg_replace($lit543, "Ё", $text); // ю // расставляем мягкие знаки после согласных $lit551 = '/([\s"\(\-])([yY])\xc3\xbc([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit551, "$1$2\xc3\xbc$3ь$4", $text); $text = preg_replace($lit551, "$1$2\xc3\xbc$3ь$4", $text); $lit552 = '/([\s"\(\-])([yY])\xc3\x9c([\xc3\xa7nprstz\xc3\x87NPRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit552, "$1$2\xc3\x9c$3Ь$4", $text); $text = preg_replace($lit552, "$1$2\xc3\x9c$3Ь$4", $text); // расставляем мягкие знаки перед ю $lit561 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])y[u\xc3\xbc]/'; $text = preg_replace($lit561, "$1ью", $text); $lit562 = '/([bc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])Y[u\xc3\xbcU\xc3\x9c]/'; $text = preg_replace($lit562, "$1ЬЮ", $text); // обрабатываем остальные вхождения ю $lit571 = '/y[u\xc3\xbc]/'; $text = preg_replace($lit571, "ю", $text); $lit572 = '/Y[u\xc3\xbcU\xc3\x9c]/'; $text = preg_replace($lit572, "Ю", $text); $lit573 = '/[yY][u\xc3\xbcU\xc3\x9c]/'; $text = preg_replace($lit573, "Ю", $text); // 6. обрабатываем bo'- bu'- и т.д. в первом слоге // 6.1. случаи, когда нужен мягкий знак // о $lit6111 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\xb6([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit6111, "$1$2о$3ь$4", $text); $text = preg_replace($lit6111, "$1$2о$3ь$4", $text); $lit6121 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\x96([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit6121, "$1$2О$3Ь$4", $text); $text = preg_replace($lit6121, "$1$2О$3Ь$4", $text); // у $lit6131 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\xbc([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit6131, "$1$2у$3ь$4", $text); $text = preg_replace($lit6131, "$1$2у$3ь$4", $text); $lit6141 = '/([\s"\(\-])([bcgkmp\xc5\x9fBCGKMP\xc5\x9e])\xc3\x9c([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit6141, "$1$2У$3Ь$4", $text); $text = preg_replace($lit6141, "$1$2У$3Ь$4", $text); // 6.2. случаи, когда мягкий знак не нужен // о $lit6211 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\xb6/'; $text = preg_replace($lit6211, "$1$2о", $text); $lit6221 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\x96/'; $text = preg_replace($lit6221, "$1$2О", $text); // у $lit6231 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\xbc/'; $text = preg_replace($lit6231, "$1$2у", $text); $lit6241 = '/([\s"\(\-])([bcgkm\xc5\x9fBCGKM\xc5\x9e])\xc3\x9c/'; $text = preg_replace($lit6241, "$1$2У", $text); // 7. обрабатываем o' и u' в начале слова // 7.1. случаи, когда мягкий знак нужен // о $lit7111 = '/([\s"\(\-])\xc3\xb6([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7111, "$1о$2ь$3", $text); $text = preg_replace($lit7111, "$1о$2ь$3", $text); $lit7121 = '/([\s"\(\-])\xc3\x96([\xc3\xa7nrstz])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7121, "$1О$2ь$3", $text); $text = preg_replace($lit7121, "$1О$2ь$3", $text); $lit7131 = '/([\s"\(\-])\xc3\x96([\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7131, "$1О$2Ь$3", $text); $text = preg_replace($lit7131, "$1О$2Ь$3", $text); // у $lit7141 = '/([\s"\(\-])\xc3\xbc([\xc3\xa7nrstz\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7141, "$1у$2ь$3", $text); $text = preg_replace($lit7141, "$1у$2ь$3", $text); $lit7151 = '/([\s"\(\-])\xc3\x9c([\xc3\xa7nrstz])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7151, "$1У$2ь$3", $text); $text = preg_replace($lit7151, "$1У$2ь$3", $text); $lit7161 = '/([\s"\(\-])\xc3\x9c([\xc3\x87NRSTZ])([\s"\.\,\:\)\-aAuUbc\xc3\xa7dfg\xc4\x9fhjklmn\xc3\xb1pqrstvyzBC\xc3\x87DFG\xc4\x9eFHJKLMN\xc3\x91PQRSTVYZ])/'; $text = preg_replace($lit7161, "$1У$2Ь$3", $text); $text = preg_replace($lit7161, "$1У$2Ь$3", $text); // 7.2. случаи, когда мягкий знак не нужен // о $lit7211 = '/([\s"\(\-])\xc3\xb6/'; $text = preg_replace($lit7211, "$1о", $text); $lit7221 = '/([\s"\(\-])\xc3\x96/'; $text = preg_replace($lit7221, "$1О", $text); // у $lit7231 = '/([\s"\(\-])\xc3\xbc/'; $text = preg_replace($lit7231, "$1у", $text); $lit7241 = '/([\s"\(\-])\xc3\x9c/'; $text = preg_replace($lit7241, "$1У", $text); // 8. все оставшиеся вхождения букв a, e, o, o', u, u', y $lit811 = '/a/'; $text = preg_replace($lit811, "а", $text); $lit812 = '/A/'; $text = preg_replace($lit812, "А", $text); $lit821 = '/e/'; $text = preg_replace($lit821, "е", $text); $lit822 = '/E/'; $text = preg_replace($lit822, "Е", $text); $lit831 = '/o/'; $text = preg_replace($lit831, "о", $text); $lit832 = '/O/'; $text = preg_replace($lit832, "О", $text); $lit841 = '/\xc3\xb6/'; $text = preg_replace($lit841, "ё", $text); $lit842 = '/\xc3\x96/'; $text = preg_replace($lit842, "Ё", $text); $lit851 = '/u/'; $text = preg_replace($lit851, "у", $text); $lit852 = '/U/'; $text = preg_replace($lit852, "У", $text); $lit861 = '/\xc3\xbc/'; $text = preg_replace($lit861, "ю", $text); $lit862 = '/\xc3\x9c/'; $text = preg_replace($lit862, "Ю", $text); $lit871 = '/y/'; $text = preg_replace($lit871, "й", $text); $lit872 = '/Y/'; $text = preg_replace($lit872, "Й", $text); // 9. буква c $lit92 = '/C([a\xc3\xa2bc\xc3\xa7defg\xc4\x9fh\xc4\xb1ijklmn\xc3\xb1o\xc3\xb6pqrs\xc5\x9ftu\xc3\xbcvyz\xd0\xb0\xd0\xb5\xd1\x91\xd0\xb9\xd0\xbe\xd1\x83\xd1\x8d\xd1\x8e\xd1\x8f])/'; $text = preg_replace($lit92, "Дж$1", $text); $lit93 = '/C([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZАЕЁЙОУЭЮЯ])/'; $text = preg_replace($lit93, "ДЖ$1", $text); $lit94 = '/([A\xc3\x82BC\xc3\x87DEFG\xc4\x9eHI\xc4\xb0JKLMN\xc3\x91O\xc3\x96PQRS\xc5\x9eTU\xc3\x9cVYZАЕЁЙОУЭЮЯ])C/'; $text = preg_replace($lit94, "$1ДЖ", $text); // 10. все остальные буквы // строчные $lit10101 = '/\xc3\xa2/'; $text = preg_replace($lit10101, "я", $text); $lit10102 = '/b/'; $text = preg_replace($lit10102, "б", $text); $lit10103 = '/\xc3\xa7/'; $text = preg_replace($lit10103, "ч", $text); $lit10104 = '/d/'; $text = preg_replace($lit10104, "д", $text); $lit10105 = '/f/'; $text = preg_replace($lit10105, "ф", $text); $lit10106 = '/g/'; $text = preg_replace($lit10106, "г", $text); $lit10107 = '/h/'; $text = preg_replace($lit10107, "х", $text); $lit10108 = '/\xc4\xb1/'; $text = preg_replace($lit10108, "ы", $text); $lit10109 = '/i/'; $text = preg_replace($lit10109, "и", $text); $lit10110 = '/j/'; $text = preg_replace($lit10110, "ж", $text); $lit10111 = '/k/'; $text = preg_replace($lit10111, "к", $text); $lit10112 = '/l/'; $text = preg_replace($lit10112, "л", $text); $lit10113 = '/m/'; $text = preg_replace($lit10113, "м", $text); $lit10114 = '/n/'; $text = preg_replace($lit10114, "н", $text); $lit10115 = '/p/'; $text = preg_replace($lit10115, "п", $text); $lit10116 = '/r/'; $text = preg_replace($lit10116, "р", $text); $lit10117 = '/s/'; $text = preg_replace($lit10117, "с", $text); $lit10118 = '/\xc5\x9f/'; $text = preg_replace($lit10118, "ш", $text); $lit10119 = '/t/'; $text = preg_replace($lit10119, "т", $text); $lit10120 = '/v/'; $text = preg_replace($lit10120, "в", $text); $lit10121 = '/z/'; $text = preg_replace($lit10121, "з", $text); // заглавные $lit10201 = '/\xc3\x82/'; $text = preg_replace($lit10201, "Я", $text); $lit10202 = '/B/'; $text = preg_replace($lit10202, "Б", $text); $lit10203 = '/\xc3\x87/'; $text = preg_replace($lit10203, "Ч", $text); $lit10204 = '/D/'; $text = preg_replace($lit10204, "Д", $text); $lit10205 = '/F/'; $text = preg_replace($lit10205, "Ф", $text); $lit10206 = '/G/'; $text = preg_replace($lit10206, "Г", $text); $lit10207 = '/H/'; $text = preg_replace($lit10207, "Х", $text); $lit10208 = '/I/'; $text = preg_replace($lit10208, "Ы", $text); $lit10209 = '/\xc4\xb0/'; $text = preg_replace($lit10209, "И", $text); $lit10210 = '/J/'; $text = preg_replace($lit10210, "Ж", $text); $lit10211 = '/K/'; $text = preg_replace($lit10211, "К", $text); $lit10212 = '/L/'; $text = preg_replace($lit10212, "Л", $text); $lit10213 = '/M/'; $text = preg_replace($lit10213, "М", $text); $lit10214 = '/N/'; $text = preg_replace($lit10214, "Н", $text); $lit10215 = '/P/'; $text = preg_replace($lit10215, "П", $text); $lit10216 = '/R/'; $text = preg_replace($lit10216, "Р", $text); $lit10217 = '/S/'; $text = preg_replace($lit10217, "С", $text); $lit10218 = '/\xc5\x9e/'; $text = preg_replace($lit10218, "Ш", $text); $lit10219 = '/T/'; $text = preg_replace($lit10219, "Т", $text); $lit10220 = '/V/'; $text = preg_replace($lit10220, "В", $text); $lit10221 = '/Z/'; $text = preg_replace($lit10221, "З", $text); // 11. убираем сочетания кьк, льл, ньн, рьр, сьс, тьт $lit1111 = '/кьк $text = preg_replace($lit1111, "кк $text); $lit1112 = '/К[ьЬ]к'; $text = preg_replace($lit1112, "Кк", $text); $lit1113 = '/КЬК/'; $text = preg_replace($lit1113, "КК", $text); $lit1111 = '/льл/'; $text = preg_replace($lit1111, "лл", $text); $lit1112 = '/Л[ьЬ]л/'; $text = preg_replace($lit1112, "Лл", $text); $lit1113 = '/ЛЬЛ/'; $text = preg_replace($lit1113, "ЛЛ", $text); $lit1121 = '/ньн/'; $text = preg_replace($lit1121, "нн", $text); $lit1122 = '/Н[ьЬ]н/'; $text = preg_replace($lit1122, "Нн", $text); $lit1123 = '/НЬН/'; $text = preg_replace($lit1123, "НН", $text); $lit1131 = '/рьр/'; $text = preg_replace($lit1131, "рр", $text); $lit1132 = '/Р[ьЬ]р/'; $text = preg_replace($lit1132, "Рр", $text); $lit1133 = '/РЬР/'; $text = preg_replace($lit1133, "РР", $text); $lit1141 = '/сьс/'; $text = preg_replace($lit1141, "сс", $text); $lit1142 = '/С[ьЬ]с/'; $text = preg_replace($lit1142, "Сс", $text); $lit1143 = '/СЬС/'; $text = preg_replace($lit1143, "СС", $text); $lit1151 = '/тьт/'; $text = preg_replace($lit1151, "тт", $text); $lit1152 = '/Т[ьЬ]т/'; $text = preg_replace($lit1152, "Тт", $text); $lit1153 = '/ТЬТ/'; $text = preg_replace($lit1153, "ТТ", $text); return $text; } } /** * It translates text into variant, specials: * - ommiting roman numbers */ function translate($text, $toVariant){ $text = $this->preConversion( $text, $toVariant ); $breaks = '[^\w\x80-\xff]'; // regexp for roman numbers $roman = 'M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'; $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/'; $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); $m = array_shift($matches); $ret = strtr($m[0], $this->mTables[$toVariant]); $mstart = $m[1]+strlen($m[0]); foreach($matches as $m) { $ret .= substr($text, $mstart, $m[1]-$mstart); $ret .= parent::translate($m[0], $toVariant); $mstart = $m[1] + strlen($m[0]); } return $ret; } } class LanguageCrh extends LanguageCrh_latn { function __construct() { global $wgHooks; parent::__construct(); $variants = array('crh', 'crh-latn', 'crh-cyrl'); $variantfallbacks = array( 'crh' => 'crh-latn', 'crh-latn' => 'crh-latn', 'crh-cyrl' => 'crh-cyrl' ); $this->mConverter = new CrhConverter( $this, 'crh', $variants, $variantfallbacks ); $wgHooks['ArticleSaveComplete'][] = $this->mConverter; } function convertGrammar( $word, $case ) { $fname="LanguageCrh::convertGrammar"; wfProfileIn( $fname ); //always convert to -latn before convertGrammar $w1 = $word; $word = $this->mConverter->autoConvert($word, 'crh-latn'); $w2 = $word; $word = parent::convertGrammar( $word, $case ); //restore encoding if( $w1 != $w2 ) { $word = $this->mConverter->translate($word, 'crh-cyrl'); } wfProfileOut( $fname ); return $word; } } ?> /*
*/