Utf8.php 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. <?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP 5.1.6 or newer
  6. *
  7. * @package CodeIgniter
  8. * @author EllisLab Dev Team
  9. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc.
  10. * @copyright Copyright (c) 2014 - 2015, British Columbia Institute of Technology (http://bcit.ca/)
  11. * @license http://codeigniter.com/user_guide/license.html
  12. * @link http://codeigniter.com
  13. * @since Version 2.0
  14. * @filesource
  15. */
  16. // ------------------------------------------------------------------------
  17. /**
  18. * Utf8 Class
  19. *
  20. * Provides support for UTF-8 environments
  21. *
  22. * @package CodeIgniter
  23. * @subpackage Libraries
  24. * @category UTF-8
  25. * @author EllisLab Dev Team
  26. * @link http://codeigniter.com/user_guide/libraries/utf8.html
  27. */
  28. class CI_Utf8 {
  29. /**
  30. * Constructor
  31. *
  32. * Determines if UTF-8 support is to be enabled
  33. *
  34. */
  35. function __construct()
  36. {
  37. log_message('debug', "Utf8 Class Initialized");
  38. global $CFG;
  39. if (
  40. preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
  41. AND function_exists('iconv') // iconv must be installed
  42. AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
  43. AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
  44. )
  45. {
  46. log_message('debug', "UTF-8 Support Enabled");
  47. define('UTF8_ENABLED', TRUE);
  48. // set internal encoding for multibyte string functions if necessary
  49. // and set a flag so we don't have to repeatedly use extension_loaded()
  50. // or function_exists()
  51. if (extension_loaded('mbstring'))
  52. {
  53. define('MB_ENABLED', TRUE);
  54. mb_internal_encoding('UTF-8');
  55. }
  56. else
  57. {
  58. define('MB_ENABLED', FALSE);
  59. }
  60. }
  61. else
  62. {
  63. log_message('debug', "UTF-8 Support Disabled");
  64. define('UTF8_ENABLED', FALSE);
  65. }
  66. }
  67. // --------------------------------------------------------------------
  68. /**
  69. * Clean UTF-8 strings
  70. *
  71. * Ensures strings are UTF-8
  72. *
  73. * @access public
  74. * @param string
  75. * @return string
  76. */
  77. function clean_string($str)
  78. {
  79. if ($this->_is_ascii($str) === FALSE)
  80. {
  81. $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
  82. }
  83. return $str;
  84. }
  85. // --------------------------------------------------------------------
  86. /**
  87. * Remove ASCII control characters
  88. *
  89. * Removes all ASCII control characters except horizontal tabs,
  90. * line feeds, and carriage returns, as all others can cause
  91. * problems in XML
  92. *
  93. * @access public
  94. * @param string
  95. * @return string
  96. */
  97. function safe_ascii_for_xml($str)
  98. {
  99. return remove_invisible_characters($str, FALSE);
  100. }
  101. // --------------------------------------------------------------------
  102. /**
  103. * Convert to UTF-8
  104. *
  105. * Attempts to convert a string to UTF-8
  106. *
  107. * @access public
  108. * @param string
  109. * @param string - input encoding
  110. * @return string
  111. */
  112. function convert_to_utf8($str, $encoding)
  113. {
  114. if (function_exists('iconv'))
  115. {
  116. $str = @iconv($encoding, 'UTF-8', $str);
  117. }
  118. elseif (function_exists('mb_convert_encoding'))
  119. {
  120. $str = @mb_convert_encoding($str, 'UTF-8', $encoding);
  121. }
  122. else
  123. {
  124. return FALSE;
  125. }
  126. return $str;
  127. }
  128. // --------------------------------------------------------------------
  129. /**
  130. * Is ASCII?
  131. *
  132. * Tests if a string is standard 7-bit ASCII or not
  133. *
  134. * @access public
  135. * @param string
  136. * @return bool
  137. */
  138. function _is_ascii($str)
  139. {
  140. return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
  141. }
  142. // --------------------------------------------------------------------
  143. }
  144. // End Utf8 Class
  145. /* End of file Utf8.php */
  146. /* Location: ./system/core/Utf8.php */