LexBash.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. // Scintilla source code edit control
  2. /** @file LexBash.cxx
  3. ** Lexer for Bash.
  4. **/
  5. // Copyright 2004-2012 by Neil Hodgson <neilh@scintilla.org>
  6. // Adapted from LexPerl by Kein-Hong Man 2004
  7. // The License.txt file describes the conditions under which this software may be distributed.
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include <assert.h>
  13. #include "ILexer.h"
  14. #include "Scintilla.h"
  15. #include "SciLexer.h"
  16. #include "WordList.h"
  17. #include "LexAccessor.h"
  18. #include "Accessor.h"
  19. #include "StyleContext.h"
  20. #include "CharacterSet.h"
  21. #include "LexerModule.h"
  22. #ifdef SCI_NAMESPACE
  23. using namespace Scintilla;
  24. #endif
  25. #define HERE_DELIM_MAX 256
  26. // define this if you want 'invalid octals' to be marked as errors
  27. // usually, this is not a good idea, permissive lexing is better
  28. #undef PEDANTIC_OCTAL
  29. #define BASH_BASE_ERROR 65
  30. #define BASH_BASE_DECIMAL 66
  31. #define BASH_BASE_HEX 67
  32. #ifdef PEDANTIC_OCTAL
  33. #define BASH_BASE_OCTAL 68
  34. #define BASH_BASE_OCTAL_ERROR 69
  35. #endif
  36. // state constants for parts of a bash command segment
  37. #define BASH_CMD_BODY 0
  38. #define BASH_CMD_START 1
  39. #define BASH_CMD_WORD 2
  40. #define BASH_CMD_TEST 3
  41. #define BASH_CMD_ARITH 4
  42. #define BASH_CMD_DELIM 5
  43. // state constants for nested delimiter pairs, used by
  44. // SCE_SH_STRING and SCE_SH_BACKTICKS processing
  45. #define BASH_DELIM_LITERAL 0
  46. #define BASH_DELIM_STRING 1
  47. #define BASH_DELIM_CSTRING 2
  48. #define BASH_DELIM_LSTRING 3
  49. #define BASH_DELIM_COMMAND 4
  50. #define BASH_DELIM_BACKTICK 5
  51. #define BASH_DELIM_STACK_MAX 7
  52. static inline int translateBashDigit(int ch) {
  53. if (ch >= '0' && ch <= '9') {
  54. return ch - '0';
  55. } else if (ch >= 'a' && ch <= 'z') {
  56. return ch - 'a' + 10;
  57. } else if (ch >= 'A' && ch <= 'Z') {
  58. return ch - 'A' + 36;
  59. } else if (ch == '@') {
  60. return 62;
  61. } else if (ch == '_') {
  62. return 63;
  63. }
  64. return BASH_BASE_ERROR;
  65. }
  66. static inline int getBashNumberBase(char *s) {
  67. int i = 0;
  68. int base = 0;
  69. while (*s) {
  70. base = base * 10 + (*s++ - '0');
  71. i++;
  72. }
  73. if (base > 64 || i > 2) {
  74. return BASH_BASE_ERROR;
  75. }
  76. return base;
  77. }
  78. static int opposite(int ch) {
  79. if (ch == '(') return ')';
  80. if (ch == '[') return ']';
  81. if (ch == '{') return '}';
  82. if (ch == '<') return '>';
  83. return ch;
  84. }
  85. static int GlobScan(StyleContext &sc) {
  86. // forward scan for a glob-like (...), no whitespace allowed
  87. int c, sLen = 0;
  88. while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
  89. if (IsASpace(c)) {
  90. return 0;
  91. } else if (c == ')') {
  92. return sLen;
  93. }
  94. }
  95. return 0;
  96. }
  97. static void ColouriseBashDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
  98. WordList *keywordlists[], Accessor &styler) {
  99. WordList &keywords = *keywordlists[0];
  100. WordList cmdDelimiter, bashStruct, bashStruct_in;
  101. cmdDelimiter.Set("| || |& & && ; ;; ( ) { }");
  102. bashStruct.Set("if elif fi while until else then do done esac eval");
  103. bashStruct_in.Set("for case select");
  104. CharacterSet setWordStart(CharacterSet::setAlpha, "_");
  105. // note that [+-] are often parts of identifiers in shell scripts
  106. CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
  107. CharacterSet setMetaCharacter(CharacterSet::setNone, "|&;()<> \t\r\n");
  108. setMetaCharacter.Add(0);
  109. CharacterSet setBashOperator(CharacterSet::setNone, "^&%()-+=|{}[]:;>,*/<?!.~@");
  110. CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
  111. CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
  112. CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!%*,./:?@[]^`{}~");
  113. CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!%*,./:=?@[]^`{}~");
  114. CharacterSet setLeftShift(CharacterSet::setDigits, "$");
  115. class HereDocCls { // Class to manage HERE document elements
  116. public:
  117. int State; // 0: '<<' encountered
  118. // 1: collect the delimiter
  119. // 2: here doc text (lines after the delimiter)
  120. int Quote; // the char after '<<'
  121. bool Quoted; // true if Quote in ('\'','"','`')
  122. bool Indent; // indented delimiter (for <<-)
  123. int DelimiterLength; // strlen(Delimiter)
  124. char Delimiter[HERE_DELIM_MAX]; // the Delimiter
  125. HereDocCls() {
  126. State = 0;
  127. Quote = 0;
  128. Quoted = false;
  129. Indent = 0;
  130. DelimiterLength = 0;
  131. Delimiter[0] = '\0';
  132. }
  133. void Append(int ch) {
  134. Delimiter[DelimiterLength++] = static_cast<char>(ch);
  135. Delimiter[DelimiterLength] = '\0';
  136. }
  137. ~HereDocCls() {
  138. }
  139. };
  140. HereDocCls HereDoc;
  141. class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
  142. public:
  143. int Count;
  144. int Up, Down;
  145. QuoteCls() {
  146. Count = 0;
  147. Up = '\0';
  148. Down = '\0';
  149. }
  150. void Open(int u) {
  151. Count++;
  152. Up = u;
  153. Down = opposite(Up);
  154. }
  155. void Start(int u) {
  156. Count = 0;
  157. Open(u);
  158. }
  159. };
  160. QuoteCls Quote;
  161. class QuoteStackCls { // Class to manage quote pairs that nest
  162. public:
  163. int Count;
  164. int Up, Down;
  165. int Style;
  166. int Depth; // levels pushed
  167. int CountStack[BASH_DELIM_STACK_MAX];
  168. int UpStack [BASH_DELIM_STACK_MAX];
  169. int StyleStack[BASH_DELIM_STACK_MAX];
  170. QuoteStackCls() {
  171. Count = 0;
  172. Up = '\0';
  173. Down = '\0';
  174. Style = 0;
  175. Depth = 0;
  176. }
  177. void Start(int u, int s) {
  178. Count = 1;
  179. Up = u;
  180. Down = opposite(Up);
  181. Style = s;
  182. }
  183. void Push(int u, int s) {
  184. if (Depth >= BASH_DELIM_STACK_MAX)
  185. return;
  186. CountStack[Depth] = Count;
  187. UpStack [Depth] = Up;
  188. StyleStack[Depth] = Style;
  189. Depth++;
  190. Count = 1;
  191. Up = u;
  192. Down = opposite(Up);
  193. Style = s;
  194. }
  195. void Pop(void) {
  196. if (Depth <= 0)
  197. return;
  198. Depth--;
  199. Count = CountStack[Depth];
  200. Up = UpStack [Depth];
  201. Style = StyleStack[Depth];
  202. Down = opposite(Up);
  203. }
  204. ~QuoteStackCls() {
  205. }
  206. };
  207. QuoteStackCls QuoteStack;
  208. int numBase = 0;
  209. int digit;
  210. Sci_PositionU endPos = startPos + length;
  211. int cmdState = BASH_CMD_START;
  212. int testExprType = 0;
  213. // Always backtracks to the start of a line that is not a continuation
  214. // of the previous line (i.e. start of a bash command segment)
  215. Sci_Position ln = styler.GetLine(startPos);
  216. if (ln > 0 && startPos == static_cast<Sci_PositionU>(styler.LineStart(ln)))
  217. ln--;
  218. for (;;) {
  219. startPos = styler.LineStart(ln);
  220. if (ln == 0 || styler.GetLineState(ln) == BASH_CMD_START)
  221. break;
  222. ln--;
  223. }
  224. initStyle = SCE_SH_DEFAULT;
  225. StyleContext sc(startPos, endPos - startPos, initStyle, styler);
  226. for (; sc.More(); sc.Forward()) {
  227. // handle line continuation, updates per-line stored state
  228. if (sc.atLineStart) {
  229. ln = styler.GetLine(sc.currentPos);
  230. if (sc.state == SCE_SH_STRING
  231. || sc.state == SCE_SH_BACKTICKS
  232. || sc.state == SCE_SH_CHARACTER
  233. || sc.state == SCE_SH_HERE_Q
  234. || sc.state == SCE_SH_COMMENTLINE
  235. || sc.state == SCE_SH_PARAM) {
  236. // force backtrack while retaining cmdState
  237. styler.SetLineState(ln, BASH_CMD_BODY);
  238. } else {
  239. if (ln > 0) {
  240. if ((sc.GetRelative(-3) == '\\' && sc.GetRelative(-2) == '\r' && sc.chPrev == '\n')
  241. || sc.GetRelative(-2) == '\\') { // handle '\' line continuation
  242. // retain last line's state
  243. } else
  244. cmdState = BASH_CMD_START;
  245. }
  246. styler.SetLineState(ln, cmdState);
  247. }
  248. }
  249. // controls change of cmdState at the end of a non-whitespace element
  250. // states BODY|TEST|ARITH persist until the end of a command segment
  251. // state WORD persist, but ends with 'in' or 'do' construct keywords
  252. int cmdStateNew = BASH_CMD_BODY;
  253. if (cmdState == BASH_CMD_TEST || cmdState == BASH_CMD_ARITH || cmdState == BASH_CMD_WORD)
  254. cmdStateNew = cmdState;
  255. int stylePrev = sc.state;
  256. // Determine if the current state should terminate.
  257. switch (sc.state) {
  258. case SCE_SH_OPERATOR:
  259. sc.SetState(SCE_SH_DEFAULT);
  260. if (cmdState == BASH_CMD_DELIM) // if command delimiter, start new command
  261. cmdStateNew = BASH_CMD_START;
  262. else if (sc.chPrev == '\\') // propagate command state if line continued
  263. cmdStateNew = cmdState;
  264. break;
  265. case SCE_SH_WORD:
  266. // "." never used in Bash variable names but used in file names
  267. if (!setWord.Contains(sc.ch)) {
  268. char s[500];
  269. char s2[10];
  270. sc.GetCurrent(s, sizeof(s));
  271. // allow keywords ending in a whitespace or command delimiter
  272. s2[0] = static_cast<char>(sc.ch);
  273. s2[1] = '\0';
  274. bool keywordEnds = IsASpace(sc.ch) || cmdDelimiter.InList(s2);
  275. // 'in' or 'do' may be construct keywords
  276. if (cmdState == BASH_CMD_WORD) {
  277. if (strcmp(s, "in") == 0 && keywordEnds)
  278. cmdStateNew = BASH_CMD_BODY;
  279. else if (strcmp(s, "do") == 0 && keywordEnds)
  280. cmdStateNew = BASH_CMD_START;
  281. else
  282. sc.ChangeState(SCE_SH_IDENTIFIER);
  283. sc.SetState(SCE_SH_DEFAULT);
  284. break;
  285. }
  286. // a 'test' keyword starts a test expression
  287. if (strcmp(s, "test") == 0) {
  288. if (cmdState == BASH_CMD_START && keywordEnds) {
  289. cmdStateNew = BASH_CMD_TEST;
  290. testExprType = 0;
  291. } else
  292. sc.ChangeState(SCE_SH_IDENTIFIER);
  293. }
  294. // detect bash construct keywords
  295. else if (bashStruct.InList(s)) {
  296. if (cmdState == BASH_CMD_START && keywordEnds)
  297. cmdStateNew = BASH_CMD_START;
  298. else
  299. sc.ChangeState(SCE_SH_IDENTIFIER);
  300. }
  301. // 'for'|'case'|'select' needs 'in'|'do' to be highlighted later
  302. else if (bashStruct_in.InList(s)) {
  303. if (cmdState == BASH_CMD_START && keywordEnds)
  304. cmdStateNew = BASH_CMD_WORD;
  305. else
  306. sc.ChangeState(SCE_SH_IDENTIFIER);
  307. }
  308. // disambiguate option items and file test operators
  309. else if (s[0] == '-') {
  310. if (cmdState != BASH_CMD_TEST)
  311. sc.ChangeState(SCE_SH_IDENTIFIER);
  312. }
  313. // disambiguate keywords and identifiers
  314. else if (cmdState != BASH_CMD_START
  315. || !(keywords.InList(s) && keywordEnds)) {
  316. sc.ChangeState(SCE_SH_IDENTIFIER);
  317. }
  318. sc.SetState(SCE_SH_DEFAULT);
  319. }
  320. break;
  321. case SCE_SH_IDENTIFIER:
  322. if (sc.chPrev == '\\') { // for escaped chars
  323. sc.ForwardSetState(SCE_SH_DEFAULT);
  324. } else if (!setWord.Contains(sc.ch)) {
  325. sc.SetState(SCE_SH_DEFAULT);
  326. } else if (cmdState == BASH_CMD_ARITH && !setWordStart.Contains(sc.ch)) {
  327. sc.SetState(SCE_SH_DEFAULT);
  328. }
  329. break;
  330. case SCE_SH_NUMBER:
  331. digit = translateBashDigit(sc.ch);
  332. if (numBase == BASH_BASE_DECIMAL) {
  333. if (sc.ch == '#') {
  334. char s[10];
  335. sc.GetCurrent(s, sizeof(s));
  336. numBase = getBashNumberBase(s);
  337. if (numBase != BASH_BASE_ERROR)
  338. break;
  339. } else if (IsADigit(sc.ch))
  340. break;
  341. } else if (numBase == BASH_BASE_HEX) {
  342. if (IsADigit(sc.ch, 16))
  343. break;
  344. #ifdef PEDANTIC_OCTAL
  345. } else if (numBase == BASH_BASE_OCTAL ||
  346. numBase == BASH_BASE_OCTAL_ERROR) {
  347. if (digit <= 7)
  348. break;
  349. if (digit <= 9) {
  350. numBase = BASH_BASE_OCTAL_ERROR;
  351. break;
  352. }
  353. #endif
  354. } else if (numBase == BASH_BASE_ERROR) {
  355. if (digit <= 9)
  356. break;
  357. } else { // DD#DDDD number style handling
  358. if (digit != BASH_BASE_ERROR) {
  359. if (numBase <= 36) {
  360. // case-insensitive if base<=36
  361. if (digit >= 36) digit -= 26;
  362. }
  363. if (digit < numBase)
  364. break;
  365. if (digit <= 9) {
  366. numBase = BASH_BASE_ERROR;
  367. break;
  368. }
  369. }
  370. }
  371. // fallthrough when number is at an end or error
  372. if (numBase == BASH_BASE_ERROR
  373. #ifdef PEDANTIC_OCTAL
  374. || numBase == BASH_BASE_OCTAL_ERROR
  375. #endif
  376. ) {
  377. sc.ChangeState(SCE_SH_ERROR);
  378. }
  379. sc.SetState(SCE_SH_DEFAULT);
  380. break;
  381. case SCE_SH_COMMENTLINE:
  382. if (sc.atLineEnd && sc.chPrev != '\\') {
  383. sc.SetState(SCE_SH_DEFAULT);
  384. }
  385. break;
  386. case SCE_SH_HERE_DELIM:
  387. // From Bash info:
  388. // ---------------
  389. // Specifier format is: <<[-]WORD
  390. // Optional '-' is for removal of leading tabs from here-doc.
  391. // Whitespace acceptable after <<[-] operator
  392. //
  393. if (HereDoc.State == 0) { // '<<' encountered
  394. HereDoc.Quote = sc.chNext;
  395. HereDoc.Quoted = false;
  396. HereDoc.DelimiterLength = 0;
  397. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  398. if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
  399. sc.Forward();
  400. HereDoc.Quoted = true;
  401. HereDoc.State = 1;
  402. } else if (setHereDoc.Contains(sc.chNext) ||
  403. (sc.chNext == '=' && cmdState != BASH_CMD_ARITH)) {
  404. // an unquoted here-doc delimiter, no special handling
  405. HereDoc.State = 1;
  406. } else if (sc.chNext == '<') { // HERE string <<<
  407. sc.Forward();
  408. sc.ForwardSetState(SCE_SH_DEFAULT);
  409. } else if (IsASpace(sc.chNext)) {
  410. // eat whitespace
  411. } else if (setLeftShift.Contains(sc.chNext) ||
  412. (sc.chNext == '=' && cmdState == BASH_CMD_ARITH)) {
  413. // left shift <<$var or <<= cases
  414. sc.ChangeState(SCE_SH_OPERATOR);
  415. sc.ForwardSetState(SCE_SH_DEFAULT);
  416. } else {
  417. // symbols terminates; deprecated zero-length delimiter
  418. HereDoc.State = 1;
  419. }
  420. } else if (HereDoc.State == 1) { // collect the delimiter
  421. // * if single quoted, there's no escape
  422. // * if double quoted, there are \\ and \" escapes
  423. if ((HereDoc.Quote == '\'' && sc.ch != HereDoc.Quote) ||
  424. (HereDoc.Quoted && sc.ch != HereDoc.Quote && sc.ch != '\\') ||
  425. (HereDoc.Quote != '\'' && sc.chPrev == '\\') ||
  426. (setHereDoc2.Contains(sc.ch))) {
  427. HereDoc.Append(sc.ch);
  428. } else if (HereDoc.Quoted && sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
  429. sc.ForwardSetState(SCE_SH_DEFAULT);
  430. } else if (sc.ch == '\\') {
  431. if (HereDoc.Quoted && sc.chNext != HereDoc.Quote && sc.chNext != '\\') {
  432. // in quoted prefixes only \ and the quote eat the escape
  433. HereDoc.Append(sc.ch);
  434. } else {
  435. // skip escape prefix
  436. }
  437. } else if (!HereDoc.Quoted) {
  438. sc.SetState(SCE_SH_DEFAULT);
  439. }
  440. if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
  441. sc.SetState(SCE_SH_ERROR);
  442. HereDoc.State = 0;
  443. }
  444. }
  445. break;
  446. case SCE_SH_HERE_Q:
  447. // HereDoc.State == 2
  448. if (sc.atLineStart) {
  449. sc.SetState(SCE_SH_HERE_Q);
  450. int prefixws = 0;
  451. while (sc.ch == '\t' && !sc.atLineEnd) { // tabulation prefix
  452. sc.Forward();
  453. prefixws++;
  454. }
  455. if (prefixws > 0)
  456. sc.SetState(SCE_SH_HERE_Q);
  457. while (!sc.atLineEnd) {
  458. sc.Forward();
  459. }
  460. char s[HERE_DELIM_MAX];
  461. sc.GetCurrent(s, sizeof(s));
  462. if (sc.LengthCurrent() == 0) { // '' or "" delimiters
  463. if ((prefixws == 0 || HereDoc.Indent) &&
  464. HereDoc.Quoted && HereDoc.DelimiterLength == 0)
  465. sc.SetState(SCE_SH_DEFAULT);
  466. break;
  467. }
  468. if (s[strlen(s) - 1] == '\r')
  469. s[strlen(s) - 1] = '\0';
  470. if (strcmp(HereDoc.Delimiter, s) == 0) {
  471. if ((prefixws == 0) || // indentation rule
  472. (prefixws > 0 && HereDoc.Indent)) {
  473. sc.SetState(SCE_SH_DEFAULT);
  474. break;
  475. }
  476. }
  477. }
  478. break;
  479. case SCE_SH_SCALAR: // variable names
  480. if (!setParam.Contains(sc.ch)) {
  481. if (sc.LengthCurrent() == 1) {
  482. // Special variable: $(, $_ etc.
  483. sc.ForwardSetState(SCE_SH_DEFAULT);
  484. } else {
  485. sc.SetState(SCE_SH_DEFAULT);
  486. }
  487. }
  488. break;
  489. case SCE_SH_STRING: // delimited styles, can nest
  490. case SCE_SH_BACKTICKS:
  491. if (sc.ch == '\\' && QuoteStack.Up != '\\') {
  492. if (QuoteStack.Style != BASH_DELIM_LITERAL)
  493. sc.Forward();
  494. } else if (sc.ch == QuoteStack.Down) {
  495. QuoteStack.Count--;
  496. if (QuoteStack.Count == 0) {
  497. if (QuoteStack.Depth > 0) {
  498. QuoteStack.Pop();
  499. } else
  500. sc.ForwardSetState(SCE_SH_DEFAULT);
  501. }
  502. } else if (sc.ch == QuoteStack.Up) {
  503. QuoteStack.Count++;
  504. } else {
  505. if (QuoteStack.Style == BASH_DELIM_STRING ||
  506. QuoteStack.Style == BASH_DELIM_LSTRING
  507. ) { // do nesting for "string", $"locale-string"
  508. if (sc.ch == '`') {
  509. QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
  510. } else if (sc.ch == '$' && sc.chNext == '(') {
  511. sc.Forward();
  512. QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
  513. }
  514. } else if (QuoteStack.Style == BASH_DELIM_COMMAND ||
  515. QuoteStack.Style == BASH_DELIM_BACKTICK
  516. ) { // do nesting for $(command), `command`
  517. if (sc.ch == '\'') {
  518. QuoteStack.Push(sc.ch, BASH_DELIM_LITERAL);
  519. } else if (sc.ch == '\"') {
  520. QuoteStack.Push(sc.ch, BASH_DELIM_STRING);
  521. } else if (sc.ch == '`') {
  522. QuoteStack.Push(sc.ch, BASH_DELIM_BACKTICK);
  523. } else if (sc.ch == '$') {
  524. if (sc.chNext == '\'') {
  525. sc.Forward();
  526. QuoteStack.Push(sc.ch, BASH_DELIM_CSTRING);
  527. } else if (sc.chNext == '\"') {
  528. sc.Forward();
  529. QuoteStack.Push(sc.ch, BASH_DELIM_LSTRING);
  530. } else if (sc.chNext == '(') {
  531. sc.Forward();
  532. QuoteStack.Push(sc.ch, BASH_DELIM_COMMAND);
  533. }
  534. }
  535. }
  536. }
  537. break;
  538. case SCE_SH_PARAM: // ${parameter}
  539. if (sc.ch == '\\' && Quote.Up != '\\') {
  540. sc.Forward();
  541. } else if (sc.ch == Quote.Down) {
  542. Quote.Count--;
  543. if (Quote.Count == 0) {
  544. sc.ForwardSetState(SCE_SH_DEFAULT);
  545. }
  546. } else if (sc.ch == Quote.Up) {
  547. Quote.Count++;
  548. }
  549. break;
  550. case SCE_SH_CHARACTER: // singly-quoted strings
  551. if (sc.ch == Quote.Down) {
  552. Quote.Count--;
  553. if (Quote.Count == 0) {
  554. sc.ForwardSetState(SCE_SH_DEFAULT);
  555. }
  556. }
  557. break;
  558. }
  559. // Must check end of HereDoc state 1 before default state is handled
  560. if (HereDoc.State == 1 && sc.atLineEnd) {
  561. // Begin of here-doc (the line after the here-doc delimiter):
  562. // Lexically, the here-doc starts from the next line after the >>, but the
  563. // first line of here-doc seem to follow the style of the last EOL sequence
  564. HereDoc.State = 2;
  565. if (HereDoc.Quoted) {
  566. if (sc.state == SCE_SH_HERE_DELIM) {
  567. // Missing quote at end of string! Syntax error in bash 4.3
  568. // Mark this bit as an error, do not colour any here-doc
  569. sc.ChangeState(SCE_SH_ERROR);
  570. sc.SetState(SCE_SH_DEFAULT);
  571. } else {
  572. // HereDoc.Quote always == '\''
  573. sc.SetState(SCE_SH_HERE_Q);
  574. }
  575. } else if (HereDoc.DelimiterLength == 0) {
  576. // no delimiter, illegal (but '' and "" are legal)
  577. sc.ChangeState(SCE_SH_ERROR);
  578. sc.SetState(SCE_SH_DEFAULT);
  579. } else {
  580. sc.SetState(SCE_SH_HERE_Q);
  581. }
  582. }
  583. // update cmdState about the current command segment
  584. if (stylePrev != SCE_SH_DEFAULT && sc.state == SCE_SH_DEFAULT) {
  585. cmdState = cmdStateNew;
  586. }
  587. // Determine if a new state should be entered.
  588. if (sc.state == SCE_SH_DEFAULT) {
  589. if (sc.ch == '\\') {
  590. // Bash can escape any non-newline as a literal
  591. sc.SetState(SCE_SH_IDENTIFIER);
  592. if (sc.chNext == '\r' || sc.chNext == '\n')
  593. sc.SetState(SCE_SH_OPERATOR);
  594. } else if (IsADigit(sc.ch)) {
  595. sc.SetState(SCE_SH_NUMBER);
  596. numBase = BASH_BASE_DECIMAL;
  597. if (sc.ch == '0') { // hex,octal
  598. if (sc.chNext == 'x' || sc.chNext == 'X') {
  599. numBase = BASH_BASE_HEX;
  600. sc.Forward();
  601. } else if (IsADigit(sc.chNext)) {
  602. #ifdef PEDANTIC_OCTAL
  603. numBase = BASH_BASE_OCTAL;
  604. #else
  605. numBase = BASH_BASE_HEX;
  606. #endif
  607. }
  608. }
  609. } else if (setWordStart.Contains(sc.ch)) {
  610. sc.SetState(SCE_SH_WORD);
  611. } else if (sc.ch == '#') {
  612. if (stylePrev != SCE_SH_WORD && stylePrev != SCE_SH_IDENTIFIER &&
  613. (sc.currentPos == 0 || setMetaCharacter.Contains(sc.chPrev))) {
  614. sc.SetState(SCE_SH_COMMENTLINE);
  615. } else {
  616. sc.SetState(SCE_SH_WORD);
  617. }
  618. // handle some zsh features within arithmetic expressions only
  619. if (cmdState == BASH_CMD_ARITH) {
  620. if (sc.chPrev == '[') { // [#8] [##8] output digit setting
  621. sc.SetState(SCE_SH_WORD);
  622. if (sc.chNext == '#') {
  623. sc.Forward();
  624. }
  625. } else if (sc.Match("##^") && IsUpperCase(sc.GetRelative(3))) { // ##^A
  626. sc.SetState(SCE_SH_IDENTIFIER);
  627. sc.Forward(3);
  628. } else if (sc.chNext == '#' && !IsASpace(sc.GetRelative(2))) { // ##a
  629. sc.SetState(SCE_SH_IDENTIFIER);
  630. sc.Forward(2);
  631. } else if (setWordStart.Contains(sc.chNext)) { // #name
  632. sc.SetState(SCE_SH_IDENTIFIER);
  633. }
  634. }
  635. } else if (sc.ch == '\"') {
  636. sc.SetState(SCE_SH_STRING);
  637. QuoteStack.Start(sc.ch, BASH_DELIM_STRING);
  638. } else if (sc.ch == '\'') {
  639. sc.SetState(SCE_SH_CHARACTER);
  640. Quote.Start(sc.ch);
  641. } else if (sc.ch == '`') {
  642. sc.SetState(SCE_SH_BACKTICKS);
  643. QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
  644. } else if (sc.ch == '$') {
  645. if (sc.Match("$((")) {
  646. sc.SetState(SCE_SH_OPERATOR); // handle '((' later
  647. continue;
  648. }
  649. sc.SetState(SCE_SH_SCALAR);
  650. sc.Forward();
  651. if (sc.ch == '{') {
  652. sc.ChangeState(SCE_SH_PARAM);
  653. Quote.Start(sc.ch);
  654. } else if (sc.ch == '\'') {
  655. sc.ChangeState(SCE_SH_STRING);
  656. QuoteStack.Start(sc.ch, BASH_DELIM_CSTRING);
  657. } else if (sc.ch == '"') {
  658. sc.ChangeState(SCE_SH_STRING);
  659. QuoteStack.Start(sc.ch, BASH_DELIM_LSTRING);
  660. } else if (sc.ch == '(') {
  661. sc.ChangeState(SCE_SH_BACKTICKS);
  662. QuoteStack.Start(sc.ch, BASH_DELIM_COMMAND);
  663. } else if (sc.ch == '`') { // $` seen in a configure script, valid?
  664. sc.ChangeState(SCE_SH_BACKTICKS);
  665. QuoteStack.Start(sc.ch, BASH_DELIM_BACKTICK);
  666. } else {
  667. continue; // scalar has no delimiter pair
  668. }
  669. } else if (sc.Match('<', '<')) {
  670. sc.SetState(SCE_SH_HERE_DELIM);
  671. HereDoc.State = 0;
  672. if (sc.GetRelative(2) == '-') { // <<- indent case
  673. HereDoc.Indent = true;
  674. sc.Forward();
  675. } else {
  676. HereDoc.Indent = false;
  677. }
  678. } else if (sc.ch == '-' && // one-char file test operators
  679. setSingleCharOp.Contains(sc.chNext) &&
  680. !setWord.Contains(sc.GetRelative(2)) &&
  681. IsASpace(sc.chPrev)) {
  682. sc.SetState(SCE_SH_WORD);
  683. sc.Forward();
  684. } else if (setBashOperator.Contains(sc.ch)) {
  685. char s[10];
  686. bool isCmdDelim = false;
  687. sc.SetState(SCE_SH_OPERATOR);
  688. // globs have no whitespace, do not appear in arithmetic expressions
  689. if (cmdState != BASH_CMD_ARITH && sc.ch == '(' && sc.chNext != '(') {
  690. int i = GlobScan(sc);
  691. if (i > 1) {
  692. sc.SetState(SCE_SH_IDENTIFIER);
  693. sc.Forward(i);
  694. continue;
  695. }
  696. }
  697. // handle opening delimiters for test/arithmetic expressions - ((,[[,[
  698. if (cmdState == BASH_CMD_START
  699. || cmdState == BASH_CMD_BODY) {
  700. if (sc.Match('(', '(')) {
  701. cmdState = BASH_CMD_ARITH;
  702. sc.Forward();
  703. } else if (sc.Match('[', '[') && IsASpace(sc.GetRelative(2))) {
  704. cmdState = BASH_CMD_TEST;
  705. testExprType = 1;
  706. sc.Forward();
  707. } else if (sc.ch == '[' && IsASpace(sc.chNext)) {
  708. cmdState = BASH_CMD_TEST;
  709. testExprType = 2;
  710. }
  711. }
  712. // special state -- for ((x;y;z)) in ... looping
  713. if (cmdState == BASH_CMD_WORD && sc.Match('(', '(')) {
  714. cmdState = BASH_CMD_ARITH;
  715. sc.Forward();
  716. continue;
  717. }
  718. // handle command delimiters in command START|BODY|WORD state, also TEST if 'test'
  719. if (cmdState == BASH_CMD_START
  720. || cmdState == BASH_CMD_BODY
  721. || cmdState == BASH_CMD_WORD
  722. || (cmdState == BASH_CMD_TEST && testExprType == 0)) {
  723. s[0] = static_cast<char>(sc.ch);
  724. if (setBashOperator.Contains(sc.chNext)) {
  725. s[1] = static_cast<char>(sc.chNext);
  726. s[2] = '\0';
  727. isCmdDelim = cmdDelimiter.InList(s);
  728. if (isCmdDelim)
  729. sc.Forward();
  730. }
  731. if (!isCmdDelim) {
  732. s[1] = '\0';
  733. isCmdDelim = cmdDelimiter.InList(s);
  734. }
  735. if (isCmdDelim) {
  736. cmdState = BASH_CMD_DELIM;
  737. continue;
  738. }
  739. }
  740. // handle closing delimiters for test/arithmetic expressions - )),]],]
  741. if (cmdState == BASH_CMD_ARITH && sc.Match(')', ')')) {
  742. cmdState = BASH_CMD_BODY;
  743. sc.Forward();
  744. } else if (cmdState == BASH_CMD_TEST && IsASpace(sc.chPrev)) {
  745. if (sc.Match(']', ']') && testExprType == 1) {
  746. sc.Forward();
  747. cmdState = BASH_CMD_BODY;
  748. } else if (sc.ch == ']' && testExprType == 2) {
  749. cmdState = BASH_CMD_BODY;
  750. }
  751. }
  752. }
  753. }// sc.state
  754. }
  755. sc.Complete();
  756. if (sc.state == SCE_SH_HERE_Q) {
  757. styler.ChangeLexerState(sc.currentPos, styler.Length());
  758. }
  759. sc.Complete();
  760. }
  761. static bool IsCommentLine(Sci_Position line, Accessor &styler) {
  762. Sci_Position pos = styler.LineStart(line);
  763. Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  764. for (Sci_Position i = pos; i < eol_pos; i++) {
  765. char ch = styler[i];
  766. if (ch == '#')
  767. return true;
  768. else if (ch != ' ' && ch != '\t')
  769. return false;
  770. }
  771. return false;
  772. }
  773. static void FoldBashDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
  774. Accessor &styler) {
  775. bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
  776. bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  777. Sci_PositionU endPos = startPos + length;
  778. int visibleChars = 0;
  779. int skipHereCh = 0;
  780. Sci_Position lineCurrent = styler.GetLine(startPos);
  781. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  782. int levelCurrent = levelPrev;
  783. char chNext = styler[startPos];
  784. int styleNext = styler.StyleAt(startPos);
  785. for (Sci_PositionU i = startPos; i < endPos; i++) {
  786. char ch = chNext;
  787. chNext = styler.SafeGetCharAt(i + 1);
  788. int style = styleNext;
  789. styleNext = styler.StyleAt(i + 1);
  790. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  791. // Comment folding
  792. if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
  793. {
  794. if (!IsCommentLine(lineCurrent - 1, styler)
  795. && IsCommentLine(lineCurrent + 1, styler))
  796. levelCurrent++;
  797. else if (IsCommentLine(lineCurrent - 1, styler)
  798. && !IsCommentLine(lineCurrent + 1, styler))
  799. levelCurrent--;
  800. }
  801. if (style == SCE_SH_OPERATOR) {
  802. if (ch == '{') {
  803. levelCurrent++;
  804. } else if (ch == '}') {
  805. levelCurrent--;
  806. }
  807. }
  808. // Here Document folding
  809. if (style == SCE_SH_HERE_DELIM) {
  810. if (ch == '<' && chNext == '<') {
  811. if (styler.SafeGetCharAt(i + 2) == '<') {
  812. skipHereCh = 1;
  813. } else {
  814. if (skipHereCh == 0) {
  815. levelCurrent++;
  816. } else {
  817. skipHereCh = 0;
  818. }
  819. }
  820. }
  821. } else if (style == SCE_SH_HERE_Q && styler.StyleAt(i+1) == SCE_SH_DEFAULT) {
  822. levelCurrent--;
  823. }
  824. if (atEOL) {
  825. int lev = levelPrev;
  826. if (visibleChars == 0 && foldCompact)
  827. lev |= SC_FOLDLEVELWHITEFLAG;
  828. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  829. lev |= SC_FOLDLEVELHEADERFLAG;
  830. if (lev != styler.LevelAt(lineCurrent)) {
  831. styler.SetLevel(lineCurrent, lev);
  832. }
  833. lineCurrent++;
  834. levelPrev = levelCurrent;
  835. visibleChars = 0;
  836. }
  837. if (!isspacechar(ch))
  838. visibleChars++;
  839. }
  840. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  841. int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
  842. styler.SetLevel(lineCurrent, levelPrev | flagsNext);
  843. }
  844. static const char * const bashWordListDesc[] = {
  845. "Keywords",
  846. 0
  847. };
  848. LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);