LexRust.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. /** @file LexRust.cxx
  2. ** Lexer for Rust.
  3. **
  4. ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
  5. ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
  6. **/
  7. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  8. // The License.txt file describes the conditions under which this software may be distributed.
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include <assert.h>
  14. #include <ctype.h>
  15. #include <string>
  16. #include <map>
  17. #include "ILexer.h"
  18. #include "Scintilla.h"
  19. #include "SciLexer.h"
  20. #include "PropSetSimple.h"
  21. #include "WordList.h"
  22. #include "LexAccessor.h"
  23. #include "Accessor.h"
  24. #include "StyleContext.h"
  25. #include "CharacterSet.h"
  26. #include "LexerModule.h"
  27. #include "OptionSet.h"
  28. #ifdef SCI_NAMESPACE
  29. using namespace Scintilla;
  30. #endif
  31. static const int NUM_RUST_KEYWORD_LISTS = 7;
  32. static const int MAX_RUST_IDENT_CHARS = 1023;
  33. static bool IsStreamCommentStyle(int style) {
  34. return style == SCE_RUST_COMMENTBLOCK ||
  35. style == SCE_RUST_COMMENTBLOCKDOC;
  36. }
  37. // Options used for LexerRust
  38. struct OptionsRust {
  39. bool fold;
  40. bool foldSyntaxBased;
  41. bool foldComment;
  42. bool foldCommentMultiline;
  43. bool foldCommentExplicit;
  44. std::string foldExplicitStart;
  45. std::string foldExplicitEnd;
  46. bool foldExplicitAnywhere;
  47. bool foldCompact;
  48. int foldAtElseInt;
  49. bool foldAtElse;
  50. OptionsRust() {
  51. fold = false;
  52. foldSyntaxBased = true;
  53. foldComment = false;
  54. foldCommentMultiline = true;
  55. foldCommentExplicit = true;
  56. foldExplicitStart = "";
  57. foldExplicitEnd = "";
  58. foldExplicitAnywhere = false;
  59. foldCompact = true;
  60. foldAtElseInt = -1;
  61. foldAtElse = false;
  62. }
  63. };
  64. static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
  65. "Primary keywords and identifiers",
  66. "Built in types",
  67. "Other keywords",
  68. "Keywords 4",
  69. "Keywords 5",
  70. "Keywords 6",
  71. "Keywords 7",
  72. 0,
  73. };
  74. struct OptionSetRust : public OptionSet<OptionsRust> {
  75. OptionSetRust() {
  76. DefineProperty("fold", &OptionsRust::fold);
  77. DefineProperty("fold.comment", &OptionsRust::foldComment);
  78. DefineProperty("fold.compact", &OptionsRust::foldCompact);
  79. DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
  80. DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
  81. "Set this property to 0 to disable syntax based folding.");
  82. DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
  83. "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
  84. DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
  85. "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
  86. DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
  87. "The string to use for explicit fold start points, replacing the standard //{.");
  88. DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
  89. "The string to use for explicit fold end points, replacing the standard //}.");
  90. DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
  91. "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
  92. DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
  93. "This option enables Rust folding on a \"} else {\" line of an if statement.");
  94. DefineWordListSets(rustWordLists);
  95. }
  96. };
  97. class LexerRust : public ILexer {
  98. WordList keywords[NUM_RUST_KEYWORD_LISTS];
  99. OptionsRust options;
  100. OptionSetRust osRust;
  101. public:
  102. virtual ~LexerRust() {
  103. }
  104. void SCI_METHOD Release() {
  105. delete this;
  106. }
  107. int SCI_METHOD Version() const {
  108. return lvOriginal;
  109. }
  110. const char * SCI_METHOD PropertyNames() {
  111. return osRust.PropertyNames();
  112. }
  113. int SCI_METHOD PropertyType(const char *name) {
  114. return osRust.PropertyType(name);
  115. }
  116. const char * SCI_METHOD DescribeProperty(const char *name) {
  117. return osRust.DescribeProperty(name);
  118. }
  119. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
  120. const char * SCI_METHOD DescribeWordListSets() {
  121. return osRust.DescribeWordListSets();
  122. }
  123. Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
  124. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  125. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  126. void * SCI_METHOD PrivateCall(int, void *) {
  127. return 0;
  128. }
  129. static ILexer *LexerFactoryRust() {
  130. return new LexerRust();
  131. }
  132. };
  133. Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
  134. if (osRust.PropertySet(&options, key, val)) {
  135. return 0;
  136. }
  137. return -1;
  138. }
  139. Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
  140. Sci_Position firstModification = -1;
  141. if (n < NUM_RUST_KEYWORD_LISTS) {
  142. WordList *wordListN = &keywords[n];
  143. WordList wlNew;
  144. wlNew.Set(wl);
  145. if (*wordListN != wlNew) {
  146. wordListN->Set(wl);
  147. firstModification = 0;
  148. }
  149. }
  150. return firstModification;
  151. }
  152. static bool IsWhitespace(int c) {
  153. return c == ' ' || c == '\t' || c == '\r' || c == '\n';
  154. }
  155. /* This isn't quite right for Unicode identifiers */
  156. static bool IsIdentifierStart(int ch) {
  157. return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
  158. }
  159. /* This isn't quite right for Unicode identifiers */
  160. static bool IsIdentifierContinue(int ch) {
  161. return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
  162. }
  163. static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
  164. while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
  165. if (pos == styler.LineEnd(styler.GetLine(pos)))
  166. styler.SetLineState(styler.GetLine(pos), 0);
  167. pos++;
  168. }
  169. styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
  170. }
  171. static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
  172. for (Sci_Position ii = 0; ii < len; ii++)
  173. s[ii] = styler[ii + start];
  174. s[len] = '\0';
  175. }
  176. static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
  177. Sci_Position start = pos;
  178. while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
  179. pos++;
  180. if (styler.SafeGetCharAt(pos, '\0') == '!') {
  181. pos++;
  182. styler.ColourTo(pos - 1, SCE_RUST_MACRO);
  183. } else {
  184. char s[MAX_RUST_IDENT_CHARS + 1];
  185. int len = pos - start;
  186. len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
  187. GrabString(s, styler, start, len);
  188. bool keyword = false;
  189. for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
  190. if (keywords[ii].InList(s)) {
  191. styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
  192. keyword = true;
  193. break;
  194. }
  195. }
  196. if (!keyword) {
  197. styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
  198. }
  199. }
  200. }
  201. /* Scans a sequence of digits, returning true if it found any. */
  202. static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
  203. Sci_Position old_pos = pos;
  204. for (;;) {
  205. int c = styler.SafeGetCharAt(pos, '\0');
  206. if (IsADigit(c, base) || c == '_')
  207. pos++;
  208. else
  209. break;
  210. }
  211. return old_pos != pos;
  212. }
  213. /* Scans an integer and floating point literals. */
  214. static void ScanNumber(Accessor& styler, Sci_Position& pos) {
  215. int base = 10;
  216. int c = styler.SafeGetCharAt(pos, '\0');
  217. int n = styler.SafeGetCharAt(pos + 1, '\0');
  218. bool error = false;
  219. /* Scan the prefix, thus determining the base.
  220. * 10 is default if there's no prefix. */
  221. if (c == '0' && n == 'x') {
  222. pos += 2;
  223. base = 16;
  224. } else if (c == '0' && n == 'b') {
  225. pos += 2;
  226. base = 2;
  227. } else if (c == '0' && n == 'o') {
  228. pos += 2;
  229. base = 8;
  230. }
  231. /* Scan initial digits. The literal is malformed if there are none. */
  232. error |= !ScanDigits(styler, pos, base);
  233. /* See if there's an integer suffix. We mimic the Rust's lexer
  234. * and munch it even if there was an error above. */
  235. c = styler.SafeGetCharAt(pos, '\0');
  236. if (c == 'u' || c == 'i') {
  237. pos++;
  238. c = styler.SafeGetCharAt(pos, '\0');
  239. n = styler.SafeGetCharAt(pos + 1, '\0');
  240. if (c == '8' || c == 's') {
  241. pos++;
  242. } else if (c == '1' && n == '6') {
  243. pos += 2;
  244. } else if (c == '3' && n == '2') {
  245. pos += 2;
  246. } else if (c == '6' && n == '4') {
  247. pos += 2;
  248. } else {
  249. error = true;
  250. }
  251. /* See if it's a floating point literal. These literals have to be base 10.
  252. */
  253. } else if (!error) {
  254. /* If there's a period, it's a floating point literal unless it's
  255. * followed by an identifier (meaning this is a method call, e.g.
  256. * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
  257. */
  258. n = styler.SafeGetCharAt(pos + 1, '\0');
  259. if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
  260. error |= base != 10;
  261. pos++;
  262. /* It's ok to have no digits after the period. */
  263. ScanDigits(styler, pos, 10);
  264. }
  265. /* Look for the exponentiation. */
  266. c = styler.SafeGetCharAt(pos, '\0');
  267. if (c == 'e' || c == 'E') {
  268. error |= base != 10;
  269. pos++;
  270. c = styler.SafeGetCharAt(pos, '\0');
  271. if (c == '-' || c == '+')
  272. pos++;
  273. /* It is invalid to have no digits in the exponent. */
  274. error |= !ScanDigits(styler, pos, 10);
  275. }
  276. /* Scan the floating point suffix. */
  277. c = styler.SafeGetCharAt(pos, '\0');
  278. if (c == 'f') {
  279. error |= base != 10;
  280. pos++;
  281. c = styler.SafeGetCharAt(pos, '\0');
  282. n = styler.SafeGetCharAt(pos + 1, '\0');
  283. if (c == '3' && n == '2') {
  284. pos += 2;
  285. } else if (c == '6' && n == '4') {
  286. pos += 2;
  287. } else {
  288. error = true;
  289. }
  290. }
  291. }
  292. if (error)
  293. styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
  294. else
  295. styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
  296. }
  297. static bool IsOneCharOperator(int c) {
  298. return c == ';' || c == ',' || c == '(' || c == ')'
  299. || c == '{' || c == '}' || c == '[' || c == ']'
  300. || c == '@' || c == '#' || c == '~' || c == '+'
  301. || c == '*' || c == '/' || c == '^' || c == '%'
  302. || c == '.' || c == ':' || c == '!' || c == '<'
  303. || c == '>' || c == '=' || c == '-' || c == '&'
  304. || c == '|' || c == '$' || c == '?';
  305. }
  306. static bool IsTwoCharOperator(int c, int n) {
  307. return (c == '.' && n == '.') || (c == ':' && n == ':')
  308. || (c == '!' && n == '=') || (c == '<' && n == '<')
  309. || (c == '<' && n == '=') || (c == '>' && n == '>')
  310. || (c == '>' && n == '=') || (c == '=' && n == '=')
  311. || (c == '=' && n == '>') || (c == '-' && n == '>')
  312. || (c == '&' && n == '&') || (c == '|' && n == '|')
  313. || (c == '-' && n == '=') || (c == '&' && n == '=')
  314. || (c == '|' && n == '=') || (c == '+' && n == '=')
  315. || (c == '*' && n == '=') || (c == '/' && n == '=')
  316. || (c == '^' && n == '=') || (c == '%' && n == '=');
  317. }
  318. static bool IsThreeCharOperator(int c, int n, int n2) {
  319. return (c == '<' && n == '<' && n2 == '=')
  320. || (c == '>' && n == '>' && n2 == '=');
  321. }
  322. static bool IsValidCharacterEscape(int c) {
  323. return c == 'n' || c == 'r' || c == 't' || c == '\\'
  324. || c == '\'' || c == '"' || c == '0';
  325. }
  326. static bool IsValidStringEscape(int c) {
  327. return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
  328. }
  329. static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
  330. for (;;) {
  331. int c = styler.SafeGetCharAt(pos, '\0');
  332. if (!IsADigit(c, 16))
  333. break;
  334. num_digits--;
  335. pos++;
  336. if (num_digits == 0 && stop_asap)
  337. return true;
  338. }
  339. if (num_digits == 0) {
  340. return true;
  341. } else {
  342. return false;
  343. }
  344. }
  345. /* This is overly permissive for character literals in order to accept UTF-8 encoded
  346. * character literals. */
  347. static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
  348. pos++;
  349. int c = styler.SafeGetCharAt(pos, '\0');
  350. int n = styler.SafeGetCharAt(pos + 1, '\0');
  351. bool done = false;
  352. bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
  353. bool valid_char = true;
  354. bool first = true;
  355. while (!done) {
  356. switch (c) {
  357. case '\\':
  358. done = true;
  359. if (IsValidCharacterEscape(n)) {
  360. pos += 2;
  361. } else if (n == 'x') {
  362. pos += 2;
  363. valid_char = ScanNumericEscape(styler, pos, 2, false);
  364. } else if (n == 'u' && !ascii_only) {
  365. pos += 2;
  366. if (styler.SafeGetCharAt(pos, '\0') != '{') {
  367. // old-style
  368. valid_char = ScanNumericEscape(styler, pos, 4, false);
  369. } else {
  370. int n_digits = 0;
  371. while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
  372. }
  373. if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
  374. pos++;
  375. else
  376. valid_char = false;
  377. }
  378. } else if (n == 'U' && !ascii_only) {
  379. pos += 2;
  380. valid_char = ScanNumericEscape(styler, pos, 8, false);
  381. } else {
  382. valid_char = false;
  383. }
  384. break;
  385. case '\'':
  386. valid_char = !first;
  387. done = true;
  388. break;
  389. case '\t':
  390. case '\n':
  391. case '\r':
  392. case '\0':
  393. valid_char = false;
  394. done = true;
  395. break;
  396. default:
  397. if (ascii_only && !IsASCII((char)c)) {
  398. done = true;
  399. valid_char = false;
  400. } else if (!IsIdentifierContinue(c) && !first) {
  401. done = true;
  402. } else {
  403. pos++;
  404. }
  405. break;
  406. }
  407. c = styler.SafeGetCharAt(pos, '\0');
  408. n = styler.SafeGetCharAt(pos + 1, '\0');
  409. first = false;
  410. }
  411. if (styler.SafeGetCharAt(pos, '\0') == '\'') {
  412. valid_lifetime = false;
  413. } else {
  414. valid_char = false;
  415. }
  416. if (valid_lifetime) {
  417. styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
  418. } else if (valid_char) {
  419. pos++;
  420. styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
  421. } else {
  422. styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
  423. }
  424. }
  425. enum CommentState {
  426. UnknownComment,
  427. DocComment,
  428. NotDocComment
  429. };
  430. /*
  431. * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
  432. * Otherwise it's a regular comment.
  433. */
  434. static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
  435. int c = styler.SafeGetCharAt(pos, '\0');
  436. bool maybe_doc_comment = false;
  437. if (c == '*') {
  438. int n = styler.SafeGetCharAt(pos + 1, '\0');
  439. if (n != '*' && n != '/') {
  440. maybe_doc_comment = true;
  441. }
  442. } else if (c == '!') {
  443. maybe_doc_comment = true;
  444. }
  445. for (;;) {
  446. int n = styler.SafeGetCharAt(pos + 1, '\0');
  447. if (pos == styler.LineEnd(styler.GetLine(pos)))
  448. styler.SetLineState(styler.GetLine(pos), level);
  449. if (c == '*') {
  450. pos++;
  451. if (n == '/') {
  452. pos++;
  453. level--;
  454. if (level == 0) {
  455. styler.SetLineState(styler.GetLine(pos), 0);
  456. if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
  457. styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
  458. else
  459. styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
  460. break;
  461. }
  462. }
  463. } else if (c == '/') {
  464. pos++;
  465. if (n == '*') {
  466. pos++;
  467. level++;
  468. }
  469. }
  470. else {
  471. pos++;
  472. }
  473. if (pos >= max) {
  474. if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
  475. styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
  476. else
  477. styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
  478. break;
  479. }
  480. c = styler.SafeGetCharAt(pos, '\0');
  481. }
  482. }
  483. /*
  484. * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
  485. * Otherwise it's a normal line comment.
  486. */
  487. static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
  488. bool maybe_doc_comment = false;
  489. int c = styler.SafeGetCharAt(pos, '\0');
  490. if (c == '/') {
  491. if (pos < max) {
  492. pos++;
  493. c = styler.SafeGetCharAt(pos, '\0');
  494. if (c != '/') {
  495. maybe_doc_comment = true;
  496. }
  497. }
  498. } else if (c == '!') {
  499. maybe_doc_comment = true;
  500. }
  501. while (pos < max && c != '\n') {
  502. if (pos == styler.LineEnd(styler.GetLine(pos)))
  503. styler.SetLineState(styler.GetLine(pos), 0);
  504. pos++;
  505. c = styler.SafeGetCharAt(pos, '\0');
  506. }
  507. if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
  508. styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
  509. else
  510. styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
  511. }
  512. static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
  513. pos++;
  514. int c = styler.SafeGetCharAt(pos, '\0');
  515. pos++;
  516. if (c == '/')
  517. ResumeLineComment(styler, pos, max, UnknownComment);
  518. else if (c == '*')
  519. ResumeBlockComment(styler, pos, max, UnknownComment, 1);
  520. }
  521. static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
  522. int c = styler.SafeGetCharAt(pos, '\0');
  523. bool error = false;
  524. while (c != '"' && !error) {
  525. if (pos >= max) {
  526. error = true;
  527. break;
  528. }
  529. if (pos == styler.LineEnd(styler.GetLine(pos)))
  530. styler.SetLineState(styler.GetLine(pos), 0);
  531. if (c == '\\') {
  532. int n = styler.SafeGetCharAt(pos + 1, '\0');
  533. if (IsValidStringEscape(n)) {
  534. pos += 2;
  535. } else if (n == 'x') {
  536. pos += 2;
  537. error = !ScanNumericEscape(styler, pos, 2, true);
  538. } else if (n == 'u' && !ascii_only) {
  539. pos += 2;
  540. if (styler.SafeGetCharAt(pos, '\0') != '{') {
  541. // old-style
  542. error = !ScanNumericEscape(styler, pos, 4, true);
  543. } else {
  544. int n_digits = 0;
  545. while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
  546. }
  547. if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
  548. pos++;
  549. else
  550. error = true;
  551. }
  552. } else if (n == 'U' && !ascii_only) {
  553. pos += 2;
  554. error = !ScanNumericEscape(styler, pos, 8, true);
  555. } else {
  556. pos += 1;
  557. error = true;
  558. }
  559. } else {
  560. if (ascii_only && !IsASCII((char)c))
  561. error = true;
  562. else
  563. pos++;
  564. }
  565. c = styler.SafeGetCharAt(pos, '\0');
  566. }
  567. if (!error)
  568. pos++;
  569. styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
  570. }
  571. static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
  572. for (;;) {
  573. if (pos == styler.LineEnd(styler.GetLine(pos)))
  574. styler.SetLineState(styler.GetLine(pos), num_hashes);
  575. int c = styler.SafeGetCharAt(pos, '\0');
  576. if (c == '"') {
  577. pos++;
  578. int trailing_num_hashes = 0;
  579. while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
  580. trailing_num_hashes++;
  581. pos++;
  582. }
  583. if (trailing_num_hashes == num_hashes) {
  584. styler.SetLineState(styler.GetLine(pos), 0);
  585. break;
  586. }
  587. } else if (pos >= max) {
  588. break;
  589. } else {
  590. if (ascii_only && !IsASCII((char)c))
  591. break;
  592. pos++;
  593. }
  594. }
  595. styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
  596. }
  597. static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
  598. pos++;
  599. int num_hashes = 0;
  600. while (styler.SafeGetCharAt(pos, '\0') == '#') {
  601. num_hashes++;
  602. pos++;
  603. }
  604. if (styler.SafeGetCharAt(pos, '\0') != '"') {
  605. styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
  606. } else {
  607. pos++;
  608. ResumeRawString(styler, pos, max, num_hashes, ascii_only);
  609. }
  610. }
  611. void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  612. PropSetSimple props;
  613. Accessor styler(pAccess, &props);
  614. Sci_Position pos = startPos;
  615. Sci_Position max = pos + length;
  616. styler.StartAt(pos);
  617. styler.StartSegment(pos);
  618. if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
  619. ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
  620. } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
  621. ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
  622. } else if (initStyle == SCE_RUST_STRING) {
  623. ResumeString(styler, pos, max, false);
  624. } else if (initStyle == SCE_RUST_BYTESTRING) {
  625. ResumeString(styler, pos, max, true);
  626. } else if (initStyle == SCE_RUST_STRINGR) {
  627. ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
  628. } else if (initStyle == SCE_RUST_BYTESTRINGR) {
  629. ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
  630. }
  631. while (pos < max) {
  632. int c = styler.SafeGetCharAt(pos, '\0');
  633. int n = styler.SafeGetCharAt(pos + 1, '\0');
  634. int n2 = styler.SafeGetCharAt(pos + 2, '\0');
  635. if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
  636. pos += 2;
  637. ResumeLineComment(styler, pos, max, NotDocComment);
  638. } else if (IsWhitespace(c)) {
  639. ScanWhitespace(styler, pos, max);
  640. } else if (c == '/' && (n == '/' || n == '*')) {
  641. ScanComments(styler, pos, max);
  642. } else if (c == 'r' && (n == '#' || n == '"')) {
  643. ScanRawString(styler, pos, max, false);
  644. } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
  645. pos++;
  646. ScanRawString(styler, pos, max, true);
  647. } else if (c == 'b' && n == '"') {
  648. pos += 2;
  649. ResumeString(styler, pos, max, true);
  650. } else if (c == 'b' && n == '\'') {
  651. pos++;
  652. ScanCharacterLiteralOrLifetime(styler, pos, true);
  653. } else if (IsIdentifierStart(c)) {
  654. ScanIdentifier(styler, pos, keywords);
  655. } else if (IsADigit(c)) {
  656. ScanNumber(styler, pos);
  657. } else if (IsThreeCharOperator(c, n, n2)) {
  658. pos += 3;
  659. styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
  660. } else if (IsTwoCharOperator(c, n)) {
  661. pos += 2;
  662. styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
  663. } else if (IsOneCharOperator(c)) {
  664. pos++;
  665. styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
  666. } else if (c == '\'') {
  667. ScanCharacterLiteralOrLifetime(styler, pos, false);
  668. } else if (c == '"') {
  669. pos++;
  670. ResumeString(styler, pos, max, false);
  671. } else {
  672. pos++;
  673. styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
  674. }
  675. }
  676. styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
  677. styler.Flush();
  678. }
  679. void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  680. if (!options.fold)
  681. return;
  682. LexAccessor styler(pAccess);
  683. Sci_PositionU endPos = startPos + length;
  684. int visibleChars = 0;
  685. bool inLineComment = false;
  686. Sci_Position lineCurrent = styler.GetLine(startPos);
  687. int levelCurrent = SC_FOLDLEVELBASE;
  688. if (lineCurrent > 0)
  689. levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
  690. Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
  691. int levelMinCurrent = levelCurrent;
  692. int levelNext = levelCurrent;
  693. char chNext = styler[startPos];
  694. int styleNext = styler.StyleAt(startPos);
  695. int style = initStyle;
  696. const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
  697. for (Sci_PositionU i = startPos; i < endPos; i++) {
  698. char ch = chNext;
  699. chNext = styler.SafeGetCharAt(i + 1);
  700. int stylePrev = style;
  701. style = styleNext;
  702. styleNext = styler.StyleAt(i + 1);
  703. bool atEOL = i == (lineStartNext-1);
  704. if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
  705. inLineComment = true;
  706. if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
  707. if (!IsStreamCommentStyle(stylePrev)) {
  708. levelNext++;
  709. } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
  710. // Comments don't end at end of line and the next character may be unstyled.
  711. levelNext--;
  712. }
  713. }
  714. if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
  715. if (userDefinedFoldMarkers) {
  716. if (styler.Match(i, options.foldExplicitStart.c_str())) {
  717. levelNext++;
  718. } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
  719. levelNext--;
  720. }
  721. } else {
  722. if ((ch == '/') && (chNext == '/')) {
  723. char chNext2 = styler.SafeGetCharAt(i + 2);
  724. if (chNext2 == '{') {
  725. levelNext++;
  726. } else if (chNext2 == '}') {
  727. levelNext--;
  728. }
  729. }
  730. }
  731. }
  732. if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
  733. if (ch == '{') {
  734. // Measure the minimum before a '{' to allow
  735. // folding on "} else {"
  736. if (levelMinCurrent > levelNext) {
  737. levelMinCurrent = levelNext;
  738. }
  739. levelNext++;
  740. } else if (ch == '}') {
  741. levelNext--;
  742. }
  743. }
  744. if (!IsASpace(ch))
  745. visibleChars++;
  746. if (atEOL || (i == endPos-1)) {
  747. int levelUse = levelCurrent;
  748. if (options.foldSyntaxBased && options.foldAtElse) {
  749. levelUse = levelMinCurrent;
  750. }
  751. int lev = levelUse | levelNext << 16;
  752. if (visibleChars == 0 && options.foldCompact)
  753. lev |= SC_FOLDLEVELWHITEFLAG;
  754. if (levelUse < levelNext)
  755. lev |= SC_FOLDLEVELHEADERFLAG;
  756. if (lev != styler.LevelAt(lineCurrent)) {
  757. styler.SetLevel(lineCurrent, lev);
  758. }
  759. lineCurrent++;
  760. lineStartNext = styler.LineStart(lineCurrent+1);
  761. levelCurrent = levelNext;
  762. levelMinCurrent = levelCurrent;
  763. if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
  764. // There is an empty line at end of file so give it same level and empty
  765. styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
  766. }
  767. visibleChars = 0;
  768. inLineComment = false;
  769. }
  770. }
  771. }
  772. LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);