240f1bfb6fab89bc8df2379952032478892cbaf4d708bcf290e2ef01e5e2959840be0cef4959873ff6b508f191b287969527dd2081bb492064bf5dd19f26d6 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. import { lst } from "./misc.js"
  2. // BIDI HELPERS
  3. export function iterateBidiSections(order, from, to, f) {
  4. if (!order) return f(from, to, "ltr", 0)
  5. let found = false
  6. for (let i = 0; i < order.length; ++i) {
  7. let part = order[i]
  8. if (part.from < to && part.to > from || from == to && part.to == from) {
  9. f(Math.max(part.from, from), Math.min(part.to, to), part.level == 1 ? "rtl" : "ltr", i)
  10. found = true
  11. }
  12. }
  13. if (!found) f(from, to, "ltr")
  14. }
  15. export let bidiOther = null
  16. export function getBidiPartAt(order, ch, sticky) {
  17. let found
  18. bidiOther = null
  19. for (let i = 0; i < order.length; ++i) {
  20. let cur = order[i]
  21. if (cur.from < ch && cur.to > ch) return i
  22. if (cur.to == ch) {
  23. if (cur.from != cur.to && sticky == "before") found = i
  24. else bidiOther = i
  25. }
  26. if (cur.from == ch) {
  27. if (cur.from != cur.to && sticky != "before") found = i
  28. else bidiOther = i
  29. }
  30. }
  31. return found != null ? found : bidiOther
  32. }
  33. // Bidirectional ordering algorithm
  34. // See http://unicode.org/reports/tr9/tr9-13.html for the algorithm
  35. // that this (partially) implements.
  36. // One-char codes used for character types:
  37. // L (L): Left-to-Right
  38. // R (R): Right-to-Left
  39. // r (AL): Right-to-Left Arabic
  40. // 1 (EN): European Number
  41. // + (ES): European Number Separator
  42. // % (ET): European Number Terminator
  43. // n (AN): Arabic Number
  44. // , (CS): Common Number Separator
  45. // m (NSM): Non-Spacing Mark
  46. // b (BN): Boundary Neutral
  47. // s (B): Paragraph Separator
  48. // t (S): Segment Separator
  49. // w (WS): Whitespace
  50. // N (ON): Other Neutrals
  51. // Returns null if characters are ordered as they appear
  52. // (left-to-right), or an array of sections ({from, to, level}
  53. // objects) in the order in which they occur visually.
  54. let bidiOrdering = (function() {
  55. // Character types for codepoints 0 to 0xff
  56. let lowTypes = "bbbbbbbbbtstwsbbbbbbbbbbbbbbssstwNN%%%NNNNNN,N,N1111111111NNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNNNLLLLLLLLLLLLLLLLLLLLLLLLLLNNNNbbbbbbsbbbbbbbbbbbbbbbbbbbbbbbbbb,N%%%%NNNNLNNNNN%%11NLNNN1LNNNNNLLLLLLLLLLLLLLLLLLLLLLLNLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLN"
  57. // Character types for codepoints 0x600 to 0x6f9
  58. let arabicTypes = "nnnnnnNNr%%r,rNNmmmmmmmmmmmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnn%nnrrrmrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrmmmmmmmnNmmmmmmrrmmNmmmmrr1111111111"
  59. function charType(code) {
  60. if (code <= 0xf7) return lowTypes.charAt(code)
  61. else if (0x590 <= code && code <= 0x5f4) return "R"
  62. else if (0x600 <= code && code <= 0x6f9) return arabicTypes.charAt(code - 0x600)
  63. else if (0x6ee <= code && code <= 0x8ac) return "r"
  64. else if (0x2000 <= code && code <= 0x200b) return "w"
  65. else if (code == 0x200c) return "b"
  66. else return "L"
  67. }
  68. let bidiRE = /[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/
  69. let isNeutral = /[stwN]/, isStrong = /[LRr]/, countsAsLeft = /[Lb1n]/, countsAsNum = /[1n]/
  70. function BidiSpan(level, from, to) {
  71. this.level = level
  72. this.from = from; this.to = to
  73. }
  74. return function(str, direction) {
  75. let outerType = direction == "ltr" ? "L" : "R"
  76. if (str.length == 0 || direction == "ltr" && !bidiRE.test(str)) return false
  77. let len = str.length, types = []
  78. for (let i = 0; i < len; ++i)
  79. types.push(charType(str.charCodeAt(i)))
  80. // W1. Examine each non-spacing mark (NSM) in the level run, and
  81. // change the type of the NSM to the type of the previous
  82. // character. If the NSM is at the start of the level run, it will
  83. // get the type of sor.
  84. for (let i = 0, prev = outerType; i < len; ++i) {
  85. let type = types[i]
  86. if (type == "m") types[i] = prev
  87. else prev = type
  88. }
  89. // W2. Search backwards from each instance of a European number
  90. // until the first strong type (R, L, AL, or sor) is found. If an
  91. // AL is found, change the type of the European number to Arabic
  92. // number.
  93. // W3. Change all ALs to R.
  94. for (let i = 0, cur = outerType; i < len; ++i) {
  95. let type = types[i]
  96. if (type == "1" && cur == "r") types[i] = "n"
  97. else if (isStrong.test(type)) { cur = type; if (type == "r") types[i] = "R" }
  98. }
  99. // W4. A single European separator between two European numbers
  100. // changes to a European number. A single common separator between
  101. // two numbers of the same type changes to that type.
  102. for (let i = 1, prev = types[0]; i < len - 1; ++i) {
  103. let type = types[i]
  104. if (type == "+" && prev == "1" && types[i+1] == "1") types[i] = "1"
  105. else if (type == "," && prev == types[i+1] &&
  106. (prev == "1" || prev == "n")) types[i] = prev
  107. prev = type
  108. }
  109. // W5. A sequence of European terminators adjacent to European
  110. // numbers changes to all European numbers.
  111. // W6. Otherwise, separators and terminators change to Other
  112. // Neutral.
  113. for (let i = 0; i < len; ++i) {
  114. let type = types[i]
  115. if (type == ",") types[i] = "N"
  116. else if (type == "%") {
  117. let end
  118. for (end = i + 1; end < len && types[end] == "%"; ++end) {}
  119. let replace = (i && types[i-1] == "!") || (end < len && types[end] == "1") ? "1" : "N"
  120. for (let j = i; j < end; ++j) types[j] = replace
  121. i = end - 1
  122. }
  123. }
  124. // W7. Search backwards from each instance of a European number
  125. // until the first strong type (R, L, or sor) is found. If an L is
  126. // found, then change the type of the European number to L.
  127. for (let i = 0, cur = outerType; i < len; ++i) {
  128. let type = types[i]
  129. if (cur == "L" && type == "1") types[i] = "L"
  130. else if (isStrong.test(type)) cur = type
  131. }
  132. // N1. A sequence of neutrals takes the direction of the
  133. // surrounding strong text if the text on both sides has the same
  134. // direction. European and Arabic numbers act as if they were R in
  135. // terms of their influence on neutrals. Start-of-level-run (sor)
  136. // and end-of-level-run (eor) are used at level run boundaries.
  137. // N2. Any remaining neutrals take the embedding direction.
  138. for (let i = 0; i < len; ++i) {
  139. if (isNeutral.test(types[i])) {
  140. let end
  141. for (end = i + 1; end < len && isNeutral.test(types[end]); ++end) {}
  142. let before = (i ? types[i-1] : outerType) == "L"
  143. let after = (end < len ? types[end] : outerType) == "L"
  144. let replace = before == after ? (before ? "L" : "R") : outerType
  145. for (let j = i; j < end; ++j) types[j] = replace
  146. i = end - 1
  147. }
  148. }
  149. // Here we depart from the documented algorithm, in order to avoid
  150. // building up an actual levels array. Since there are only three
  151. // levels (0, 1, 2) in an implementation that doesn't take
  152. // explicit embedding into account, we can build up the order on
  153. // the fly, without following the level-based algorithm.
  154. let order = [], m
  155. for (let i = 0; i < len;) {
  156. if (countsAsLeft.test(types[i])) {
  157. let start = i
  158. for (++i; i < len && countsAsLeft.test(types[i]); ++i) {}
  159. order.push(new BidiSpan(0, start, i))
  160. } else {
  161. let pos = i, at = order.length, isRTL = direction == "rtl" ? 1 : 0
  162. for (++i; i < len && types[i] != "L"; ++i) {}
  163. for (let j = pos; j < i;) {
  164. if (countsAsNum.test(types[j])) {
  165. if (pos < j) { order.splice(at, 0, new BidiSpan(1, pos, j)); at += isRTL }
  166. let nstart = j
  167. for (++j; j < i && countsAsNum.test(types[j]); ++j) {}
  168. order.splice(at, 0, new BidiSpan(2, nstart, j))
  169. at += isRTL
  170. pos = j
  171. } else ++j
  172. }
  173. if (pos < i) order.splice(at, 0, new BidiSpan(1, pos, i))
  174. }
  175. }
  176. if (direction == "ltr") {
  177. if (order[0].level == 1 && (m = str.match(/^\s+/))) {
  178. order[0].from = m[0].length
  179. order.unshift(new BidiSpan(0, 0, m[0].length))
  180. }
  181. if (lst(order).level == 1 && (m = str.match(/\s+$/))) {
  182. lst(order).to -= m[0].length
  183. order.push(new BidiSpan(0, len - m[0].length, len))
  184. }
  185. }
  186. return direction == "rtl" ? order.reverse() : order
  187. }
  188. })()
  189. // Get the bidi ordering for the given line (and cache it). Returns
  190. // false for lines that are fully left-to-right, and an array of
  191. // BidiSpan objects otherwise.
  192. export function getOrder(line, direction) {
  193. let order = line.order
  194. if (order == null) order = line.order = bidiOrdering(line.text, direction)
  195. return order
  196. }