ShallowParser.php 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. <?php
  2. /* vim: set shiftwidth=2 expandtab softtabstop=2: */
  3. namespace Boris;
  4. /**
  5. * The ShallowParser takes whatever is currently buffered and chunks it into individual statements.
  6. */
  7. class ShallowParser {
  8. private $_pairs = array(
  9. '(' => ')',
  10. '{' => '}',
  11. '[' => ']',
  12. '"' => '"',
  13. "'" => "'",
  14. '//' => "\n",
  15. '#' => "\n",
  16. '/*' => '*/',
  17. '<<<' => '_heredoc_special_case_'
  18. );
  19. private $_initials;
  20. public function __construct() {
  21. $this->_initials = '/^(' . implode('|', array_map(array($this, 'quote'), array_keys($this->_pairs))) . ')/';
  22. }
  23. /**
  24. * Break the $buffer into chunks, with one for each highest-level construct possible.
  25. *
  26. * If the buffer is incomplete, returns an empty array.
  27. *
  28. * @param string $buffer
  29. *
  30. * @return array
  31. */
  32. public function statements($buffer) {
  33. $result = $this->_createResult($buffer);
  34. while (strlen($result->buffer) > 0) {
  35. $this->_resetResult($result);
  36. if ($result->state == '<<<') {
  37. if (!$this->_initializeHeredoc($result)) {
  38. continue;
  39. }
  40. }
  41. $rules = array('_scanEscapedChar', '_scanRegion', '_scanStateEntrant', '_scanWsp', '_scanChar');
  42. foreach ($rules as $method) {
  43. if ($this->$method($result)) {
  44. break;
  45. }
  46. }
  47. if ($result->stop) {
  48. break;
  49. }
  50. }
  51. if (!empty($result->statements) && trim($result->stmt) === '' && strlen($result->buffer) == 0) {
  52. $this->_combineStatements($result);
  53. $this->_prepareForDebug($result);
  54. return $result->statements;
  55. }
  56. }
  57. public function quote($token) {
  58. return preg_quote($token, '/');
  59. }
  60. // -- Private Methods
  61. private function _createResult($buffer) {
  62. $result = new \stdClass();
  63. $result->buffer = $buffer;
  64. $result->stmt = '';
  65. $result->state = null;
  66. $result->states = array();
  67. $result->statements = array();
  68. $result->stop = false;
  69. return $result;
  70. }
  71. private function _resetResult($result) {
  72. $result->stop = false;
  73. $result->state = end($result->states);
  74. $result->terminator = $result->state
  75. ? '/^(.*?' . preg_quote($this->_pairs[$result->state], '/') . ')/s'
  76. : null
  77. ;
  78. }
  79. private function _combineStatements($result) {
  80. $combined = array();
  81. foreach ($result->statements as $scope) {
  82. if (trim($scope) == ';' || substr(trim($scope), -1) != ';') {
  83. $combined[] = ((string) array_pop($combined)) . $scope;
  84. } else {
  85. $combined[] = $scope;
  86. }
  87. }
  88. $result->statements = $combined;
  89. }
  90. private function _prepareForDebug($result) {
  91. $result->statements []= $this->_prepareDebugStmt(array_pop($result->statements));
  92. }
  93. private function _initializeHeredoc($result) {
  94. if (preg_match('/^([\'"]?)([a-z_][a-z0-9_]*)\\1/i', $result->buffer, $match)) {
  95. $docId = $match[2];
  96. $result->stmt .= $match[0];
  97. $result->buffer = substr($result->buffer, strlen($match[0]));
  98. $result->terminator = '/^(.*?\n' . $docId . ');?\n/s';
  99. return true;
  100. } else {
  101. return false;
  102. }
  103. }
  104. private function _scanWsp($result) {
  105. if (preg_match('/^\s+/', $result->buffer, $match)) {
  106. if (!empty($result->statements) && $result->stmt === '') {
  107. $result->statements[] = array_pop($result->statements) . $match[0];
  108. } else {
  109. $result->stmt .= $match[0];
  110. }
  111. $result->buffer = substr($result->buffer, strlen($match[0]));
  112. return true;
  113. } else {
  114. return false;
  115. }
  116. }
  117. private function _scanEscapedChar($result) {
  118. if (($result->state == '"' || $result->state == "'")
  119. && preg_match('/^[^' . $result->state . ']*?\\\\./s', $result->buffer, $match)) {
  120. $result->stmt .= $match[0];
  121. $result->buffer = substr($result->buffer, strlen($match[0]));
  122. return true;
  123. } else {
  124. return false;
  125. }
  126. }
  127. private function _scanRegion($result) {
  128. if (in_array($result->state, array('"', "'", '<<<', '//', '#', '/*'))) {
  129. if (preg_match($result->terminator, $result->buffer, $match)) {
  130. $result->stmt .= $match[1];
  131. $result->buffer = substr($result->buffer, strlen($match[1]));
  132. array_pop($result->states);
  133. } else {
  134. $result->stop = true;
  135. }
  136. return true;
  137. } else {
  138. return false;
  139. }
  140. }
  141. private function _scanStateEntrant($result) {
  142. if (preg_match($this->_initials, $result->buffer, $match)) {
  143. $result->stmt .= $match[0];
  144. $result->buffer = substr($result->buffer, strlen($match[0]));
  145. $result->states[] = $match[0];
  146. return true;
  147. } else {
  148. return false;
  149. }
  150. }
  151. private function _scanChar($result) {
  152. $chr = substr($result->buffer, 0, 1);
  153. $result->stmt .= $chr;
  154. $result->buffer = substr($result->buffer, 1);
  155. if ($result->state && $chr == $this->_pairs[$result->state]) {
  156. array_pop($result->states);
  157. }
  158. if (empty($result->states) && ($chr == ';' || $chr == '}')) {
  159. if (!$this->_isLambda($result->stmt) || $chr == ';') {
  160. $result->statements[] = $result->stmt;
  161. $result->stmt = '';
  162. }
  163. }
  164. return true;
  165. }
  166. private function _isLambda($input) {
  167. return preg_match(
  168. '/^([^=]*?=\s*)?function\s*\([^\)]*\)\s*(use\s*\([^\)]*\)\s*)?\s*\{.*\}\s*;?$/is',
  169. trim($input)
  170. );
  171. }
  172. private function _isReturnable($input) {
  173. $input = trim($input);
  174. if (substr($input, -1) == ';' && substr($input, 0, 1) != '{') {
  175. return $this->_isLambda($input) || !preg_match(
  176. '/^(' .
  177. 'echo|print|exit|die|goto|global|include|include_once|require|require_once|list|' .
  178. 'return|do|for|foreach|while|if|function|namespace|class|interface|abstract|switch|' .
  179. 'declare|throw|try|unset' .
  180. ')\b/i',
  181. $input
  182. );
  183. } else {
  184. return false;
  185. }
  186. }
  187. private function _prepareDebugStmt($input) {
  188. if ($this->_isReturnable($input) && !preg_match('/^\s*return/i', $input)) {
  189. $input = sprintf('return %s', $input);
  190. }
  191. return $input;
  192. }
  193. }