Encoding.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. <?php
  2. declare(strict_types=1);
  3. namespace PhpMyAdmin;
  4. use function array_intersect;
  5. use function array_map;
  6. use function explode;
  7. use function fclose;
  8. use function feof;
  9. use function fgets;
  10. use function fopen;
  11. use function function_exists;
  12. use function fwrite;
  13. use function recode_string;
  14. use function mb_convert_encoding;
  15. use function mb_convert_kana;
  16. use function mb_detect_encoding;
  17. use function mb_list_encodings;
  18. use function tempnam;
  19. use function unlink;
  20. use function iconv;
  21. /**
  22. * Encoding conversion helper class
  23. */
  24. class Encoding
  25. {
  26. /**
  27. * None encoding conversion engine
  28. */
  29. public const ENGINE_NONE = 0;
  30. /**
  31. * iconv encoding conversion engine
  32. */
  33. public const ENGINE_ICONV = 1;
  34. /**
  35. * recode encoding conversion engine
  36. */
  37. public const ENGINE_RECODE = 2;
  38. /**
  39. * mbstring encoding conversion engine
  40. */
  41. public const ENGINE_MB = 3;
  42. /**
  43. * Chosen encoding engine
  44. *
  45. * @var int
  46. */
  47. private static $engine = null;
  48. /**
  49. * Map of conversion engine configurations
  50. *
  51. * Each entry contains:
  52. *
  53. * - function to detect
  54. * - engine contant
  55. * - extension name to warn when missing
  56. *
  57. * @var array
  58. */
  59. private static $enginemap = [
  60. 'iconv' => [
  61. 'iconv',
  62. self::ENGINE_ICONV,
  63. 'iconv',
  64. ],
  65. 'recode' => [
  66. 'recode_string',
  67. self::ENGINE_RECODE,
  68. 'recode',
  69. ],
  70. 'mb' => [
  71. 'mb_convert_encoding',
  72. self::ENGINE_MB,
  73. 'mbstring',
  74. ],
  75. 'none' => [
  76. 'isset',
  77. self::ENGINE_NONE,
  78. '',
  79. ],
  80. ];
  81. /**
  82. * Order of automatic detection of engines
  83. *
  84. * @var array
  85. */
  86. private static $engineorder = [
  87. 'iconv',
  88. 'mb',
  89. 'recode',
  90. ];
  91. /**
  92. * Kanji encodings list
  93. *
  94. * @var string
  95. */
  96. private static $kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
  97. /**
  98. * Initializes encoding engine detecting available backends.
  99. */
  100. public static function initEngine(): void
  101. {
  102. $engine = 'auto';
  103. if (isset($GLOBALS['cfg']['RecodingEngine'])) {
  104. $engine = $GLOBALS['cfg']['RecodingEngine'];
  105. }
  106. /* Use user configuration */
  107. if (isset(self::$enginemap[$engine])) {
  108. if (function_exists(self::$enginemap[$engine][0])) {
  109. self::$engine = self::$enginemap[$engine][1];
  110. return;
  111. }
  112. Core::warnMissingExtension(self::$enginemap[$engine][2]);
  113. }
  114. /* Autodetection */
  115. foreach (self::$engineorder as $engine) {
  116. if (function_exists(self::$enginemap[$engine][0])) {
  117. self::$engine = self::$enginemap[$engine][1];
  118. return;
  119. }
  120. }
  121. /* Fallback to none conversion */
  122. self::$engine = self::ENGINE_NONE;
  123. }
  124. /**
  125. * Setter for engine. Use with caution, mostly useful for testing.
  126. *
  127. * @param int $engine Engine encoding
  128. */
  129. public static function setEngine(int $engine): void
  130. {
  131. self::$engine = $engine;
  132. }
  133. /**
  134. * Checks whether there is any charset conversion supported
  135. */
  136. public static function isSupported(): bool
  137. {
  138. if (self::$engine === null) {
  139. self::initEngine();
  140. }
  141. return self::$engine != self::ENGINE_NONE;
  142. }
  143. /**
  144. * Converts encoding of text according to parameters with detected
  145. * conversion function.
  146. *
  147. * @param string $src_charset source charset
  148. * @param string $dest_charset target charset
  149. * @param string $what what to convert
  150. *
  151. * @return string converted text
  152. *
  153. * @access public
  154. */
  155. public static function convertString(
  156. string $src_charset,
  157. string $dest_charset,
  158. string $what
  159. ): string {
  160. if ($src_charset == $dest_charset) {
  161. return $what;
  162. }
  163. if (self::$engine === null) {
  164. self::initEngine();
  165. }
  166. switch (self::$engine) {
  167. case self::ENGINE_RECODE:
  168. return recode_string(
  169. $src_charset . '..' . $dest_charset,
  170. $what
  171. );
  172. case self::ENGINE_ICONV:
  173. return iconv(
  174. $src_charset,
  175. $dest_charset .
  176. ($GLOBALS['cfg']['IconvExtraParams'] ?? ''),
  177. $what
  178. );
  179. case self::ENGINE_MB:
  180. return mb_convert_encoding(
  181. $what,
  182. $dest_charset,
  183. $src_charset
  184. );
  185. default:
  186. return $what;
  187. }
  188. }
  189. /**
  190. * Detects whether Kanji encoding is available
  191. */
  192. public static function canConvertKanji(): bool
  193. {
  194. return $GLOBALS['lang'] === 'ja';
  195. }
  196. /**
  197. * Setter for Kanji encodings. Use with caution, mostly useful for testing.
  198. */
  199. public static function getKanjiEncodings(): string
  200. {
  201. return self::$kanjiEncodings;
  202. }
  203. /**
  204. * Setter for Kanji encodings. Use with caution, mostly useful for testing.
  205. *
  206. * @param string $value Kanji encodings list
  207. */
  208. public static function setKanjiEncodings(string $value): void
  209. {
  210. self::$kanjiEncodings = $value;
  211. }
  212. /**
  213. * Reverses SJIS & EUC-JP position in the encoding codes list
  214. */
  215. public static function kanjiChangeOrder(): void
  216. {
  217. $parts = explode(',', self::$kanjiEncodings);
  218. if ($parts[1] === 'EUC-JP') {
  219. self::$kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
  220. } else {
  221. self::$kanjiEncodings = 'ASCII,EUC-JP,SJIS,JIS';
  222. }
  223. }
  224. /**
  225. * Kanji string encoding convert
  226. *
  227. * @param string $str the string to convert
  228. * @param string $enc the destination encoding code
  229. * @param string $kana set 'kana' convert to JIS-X208-kana
  230. *
  231. * @return string the converted string
  232. */
  233. public static function kanjiStrConv(string $str, string $enc, string $kana): string
  234. {
  235. if ($enc == '' && $kana == '') {
  236. return $str;
  237. }
  238. $string_encoding = mb_detect_encoding($str, self::$kanjiEncodings);
  239. if ($string_encoding === false) {
  240. $string_encoding = 'utf-8';
  241. }
  242. if ($kana === 'kana') {
  243. $dist = mb_convert_kana($str, 'KV', $string_encoding);
  244. $str = $dist;
  245. }
  246. if ($string_encoding != $enc && $enc != '') {
  247. $dist = mb_convert_encoding($str, $enc, $string_encoding);
  248. } else {
  249. $dist = $str;
  250. }
  251. return $dist;
  252. }
  253. /**
  254. * Kanji file encoding convert
  255. *
  256. * @param string $file the name of the file to convert
  257. * @param string $enc the destination encoding code
  258. * @param string $kana set 'kana' convert to JIS-X208-kana
  259. *
  260. * @return string the name of the converted file
  261. */
  262. public static function kanjiFileConv(string $file, string $enc, string $kana): string
  263. {
  264. if ($enc == '' && $kana == '') {
  265. return $file;
  266. }
  267. $tmpfname = (string) tempnam($GLOBALS['PMA_Config']->getUploadTempDir(), $enc);
  268. $fpd = fopen($tmpfname, 'wb');
  269. $fps = fopen($file, 'r');
  270. self::kanjiChangeOrder();
  271. while (! feof($fps)) {
  272. $line = fgets($fps, 4096);
  273. $dist = self::kanjiStrConv($line, $enc, $kana);
  274. fwrite($fpd, $dist);
  275. }
  276. self::kanjiChangeOrder();
  277. fclose($fps);
  278. fclose($fpd);
  279. unlink($file);
  280. return $tmpfname;
  281. }
  282. /**
  283. * Defines radio form fields to switch between encoding modes
  284. *
  285. * @return string HTML code for the radio controls
  286. */
  287. public static function kanjiEncodingForm(): string
  288. {
  289. $template = new Template();
  290. return $template->render('encoding/kanji_encoding_form');
  291. }
  292. /**
  293. * Lists available encodings.
  294. *
  295. * @return array
  296. */
  297. public static function listEncodings(): array
  298. {
  299. if (self::$engine === null) {
  300. self::initEngine();
  301. }
  302. /* Most engines do not support listing */
  303. if (self::$engine != self::ENGINE_MB) {
  304. return $GLOBALS['cfg']['AvailableCharsets'];
  305. }
  306. return array_intersect(
  307. array_map('strtolower', mb_list_encodings()),
  308. $GLOBALS['cfg']['AvailableCharsets']
  309. );
  310. }
  311. }