Charsets.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. <?php
  2. /**
  3. * MySQL charset metadata and manipulations
  4. */
  5. declare(strict_types=1);
  6. namespace PhpMyAdmin;
  7. use PhpMyAdmin\Charsets\Charset;
  8. use PhpMyAdmin\Charsets\Collation;
  9. use const SORT_STRING;
  10. use function array_keys;
  11. use function count;
  12. use function explode;
  13. use function is_string;
  14. use function ksort;
  15. /**
  16. * Class used to manage MySQL charsets
  17. */
  18. class Charsets
  19. {
  20. /**
  21. * MySQL charsets map
  22. *
  23. * @var array
  24. */
  25. public static $mysqlCharsetMap = [
  26. 'big5' => 'big5',
  27. 'cp-866' => 'cp866',
  28. 'euc-jp' => 'ujis',
  29. 'euc-kr' => 'euckr',
  30. 'gb2312' => 'gb2312',
  31. 'gbk' => 'gbk',
  32. 'iso-8859-1' => 'latin1',
  33. 'iso-8859-2' => 'latin2',
  34. 'iso-8859-7' => 'greek',
  35. 'iso-8859-8' => 'hebrew',
  36. 'iso-8859-8-i' => 'hebrew',
  37. 'iso-8859-9' => 'latin5',
  38. 'iso-8859-13' => 'latin7',
  39. 'iso-8859-15' => 'latin1',
  40. 'koi8-r' => 'koi8r',
  41. 'shift_jis' => 'sjis',
  42. 'tis-620' => 'tis620',
  43. 'utf-8' => 'utf8',
  44. 'windows-1250' => 'cp1250',
  45. 'windows-1251' => 'cp1251',
  46. 'windows-1252' => 'latin1',
  47. 'windows-1256' => 'cp1256',
  48. 'windows-1257' => 'cp1257',
  49. ];
  50. /**
  51. * The charset for the server
  52. *
  53. * @var Charset|null
  54. */
  55. private static $serverCharset = null;
  56. /** @var array<string, Charset> */
  57. private static $charsets = [];
  58. /** @var array<string, array<string, Collation>> */
  59. private static $collations = [];
  60. /**
  61. * Loads charset data from the server
  62. *
  63. * @param DatabaseInterface $dbi DatabaseInterface instance
  64. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  65. */
  66. private static function loadCharsets(DatabaseInterface $dbi, bool $disableIs): void
  67. {
  68. /* Data already loaded */
  69. if (count(self::$charsets) > 0) {
  70. return;
  71. }
  72. if ($disableIs) {
  73. $sql = 'SHOW CHARACTER SET';
  74. } else {
  75. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  76. . ' `DEFAULT_COLLATE_NAME` AS `Default collation`,'
  77. . ' `DESCRIPTION` AS `Description`,'
  78. . ' `MAXLEN` AS `Maxlen`'
  79. . ' FROM `information_schema`.`CHARACTER_SETS`';
  80. }
  81. $res = $dbi->query($sql);
  82. self::$charsets = [];
  83. while ($row = $dbi->fetchAssoc($res)) {
  84. self::$charsets[$row['Charset']] = Charset::fromServer($row);
  85. }
  86. $dbi->freeResult($res);
  87. ksort(self::$charsets, SORT_STRING);
  88. }
  89. /**
  90. * Loads collation data from the server
  91. *
  92. * @param DatabaseInterface $dbi DatabaseInterface instance
  93. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  94. */
  95. private static function loadCollations(DatabaseInterface $dbi, bool $disableIs): void
  96. {
  97. /* Data already loaded */
  98. if (count(self::$collations) > 0) {
  99. return;
  100. }
  101. if ($disableIs) {
  102. $sql = 'SHOW COLLATION';
  103. } else {
  104. $sql = 'SELECT `COLLATION_NAME` AS `Collation`,'
  105. . ' `CHARACTER_SET_NAME` AS `Charset`,'
  106. . ' `ID` AS `Id`,'
  107. . ' `IS_DEFAULT` AS `Default`,'
  108. . ' `IS_COMPILED` AS `Compiled`,'
  109. . ' `SORTLEN` AS `Sortlen`'
  110. . ' FROM `information_schema`.`COLLATIONS`';
  111. }
  112. $res = $dbi->query($sql);
  113. self::$collations = [];
  114. while ($row = $dbi->fetchAssoc($res)) {
  115. self::$collations[$row['Charset']][$row['Collation']] = Collation::fromServer($row);
  116. }
  117. $dbi->freeResult($res);
  118. foreach (array_keys(self::$collations) as $charset) {
  119. ksort(self::$collations[$charset], SORT_STRING);
  120. }
  121. }
  122. /**
  123. * Get current server charset
  124. *
  125. * @param DatabaseInterface $dbi DatabaseInterface instance
  126. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  127. */
  128. public static function getServerCharset(DatabaseInterface $dbi, bool $disableIs): Charset
  129. {
  130. if (self::$serverCharset !== null) {
  131. return self::$serverCharset;
  132. }
  133. self::loadCharsets($dbi, $disableIs);
  134. $serverCharset = $dbi->getVariable('character_set_server');
  135. if (! is_string($serverCharset)) {// MySQL 5.7.8 fallback, issue #15614
  136. $serverCharset = $dbi->fetchValue('SELECT @@character_set_server;');
  137. }
  138. self::$serverCharset = self::$charsets[$serverCharset] ?? null;
  139. // MySQL 8.0.11+ fallback, issue #16931
  140. if (self::$serverCharset === null && $serverCharset === 'utf8mb3') {
  141. // See: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-11.html#mysqld-8-0-11-charset
  142. // The utf8mb3 character set will be replaced by utf8mb4 in a future MySQL version.
  143. // The utf8 character set is currently an alias for utf8mb3,
  144. // but will at that point become a reference to utf8mb4.
  145. // To avoid ambiguity about the meaning of utf8,
  146. // consider specifying utf8mb4 explicitly for character set references instead of utf8.
  147. // Warning: #3719 'utf8' is currently an alias for the character set UTF8MB3 [...]
  148. return self::$charsets['utf8'];
  149. }
  150. if (self::$serverCharset === null) {// Fallback in case nothing is found
  151. return Charset::fromServer(
  152. [
  153. 'Charset' => __('Unknown'),
  154. 'Description' => __('Unknown'),
  155. ]
  156. );
  157. }
  158. return self::$serverCharset;
  159. }
  160. /**
  161. * Get all server charsets
  162. *
  163. * @param DatabaseInterface $dbi DatabaseInterface instance
  164. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  165. *
  166. * @return array
  167. */
  168. public static function getCharsets(DatabaseInterface $dbi, bool $disableIs): array
  169. {
  170. self::loadCharsets($dbi, $disableIs);
  171. return self::$charsets;
  172. }
  173. /**
  174. * Get all server collations
  175. *
  176. * @param DatabaseInterface $dbi DatabaseInterface instance
  177. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  178. *
  179. * @return array
  180. */
  181. public static function getCollations(DatabaseInterface $dbi, bool $disableIs): array
  182. {
  183. self::loadCollations($dbi, $disableIs);
  184. return self::$collations;
  185. }
  186. /**
  187. * @param DatabaseInterface $dbi DatabaseInterface instance
  188. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  189. * @param string|null $name Collation name
  190. */
  191. public static function findCollationByName(DatabaseInterface $dbi, bool $disableIs, ?string $name): ?Collation
  192. {
  193. $pieces = explode('_', (string) $name);
  194. if ($pieces === false || ! isset($pieces[0])) {
  195. return null;
  196. }
  197. $charset = $pieces[0];
  198. $collations = self::getCollations($dbi, $disableIs);
  199. return $collations[$charset][$name] ?? null;
  200. }
  201. }