LexCOBOL.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. // Scintilla source code edit control
  2. /** @file LexCOBOL.cxx
  3. ** Lexer for COBOL
  4. ** Based on LexPascal.cxx
  5. ** Written by Laurent le Tynevez
  6. ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
  7. ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
  8. ** Updated by Rod Falck, Aug 2006 Converted to COBOL
  9. **/
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. #include <stdarg.h>
  14. #include <assert.h>
  15. #include <ctype.h>
  16. #include "ILexer.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "WordList.h"
  20. #include "LexAccessor.h"
  21. #include "Accessor.h"
  22. #include "StyleContext.h"
  23. #include "CharacterSet.h"
  24. #include "LexerModule.h"
  25. #ifdef SCI_NAMESPACE
  26. using namespace Scintilla;
  27. #endif
  28. #define IN_DIVISION 0x01
  29. #define IN_DECLARATIVES 0x02
  30. #define IN_SECTION 0x04
  31. #define IN_PARAGRAPH 0x08
  32. #define IN_FLAGS 0xF
  33. #define NOT_HEADER 0x10
  34. inline bool isCOBOLoperator(char ch)
  35. {
  36. return isoperator(ch);
  37. }
  38. inline bool isCOBOLwordchar(char ch)
  39. {
  40. return IsASCII(ch) && (isalnum(ch) || ch == '-');
  41. }
  42. inline bool isCOBOLwordstart(char ch)
  43. {
  44. return IsASCII(ch) && isalnum(ch);
  45. }
  46. static int CountBits(int nBits)
  47. {
  48. int count = 0;
  49. for (int i = 0; i < 32; ++i)
  50. {
  51. count += nBits & 1;
  52. nBits >>= 1;
  53. }
  54. return count;
  55. }
  56. static void getRange(Sci_PositionU start,
  57. Sci_PositionU end,
  58. Accessor &styler,
  59. char *s,
  60. Sci_PositionU len) {
  61. Sci_PositionU i = 0;
  62. while ((i < end - start + 1) && (i < len-1)) {
  63. s[i] = static_cast<char>(tolower(styler[start + i]));
  64. i++;
  65. }
  66. s[i] = '\0';
  67. }
  68. static void ColourTo(Accessor &styler, Sci_PositionU end, unsigned int attr) {
  69. styler.ColourTo(end, attr);
  70. }
  71. static int classifyWordCOBOL(Sci_PositionU start, Sci_PositionU end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) {
  72. int ret = 0;
  73. WordList& a_keywords = *keywordlists[0];
  74. WordList& b_keywords = *keywordlists[1];
  75. WordList& c_keywords = *keywordlists[2];
  76. char s[100];
  77. s[0] = '\0';
  78. s[1] = '\0';
  79. getRange(start, end, styler, s, sizeof(s));
  80. char chAttr = SCE_C_IDENTIFIER;
  81. if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) {
  82. chAttr = SCE_C_NUMBER;
  83. char *p = s + 1;
  84. while (*p) {
  85. if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) {
  86. chAttr = SCE_C_IDENTIFIER;
  87. break;
  88. }
  89. ++p;
  90. }
  91. }
  92. else {
  93. if (a_keywords.InList(s)) {
  94. chAttr = SCE_C_WORD;
  95. }
  96. else if (b_keywords.InList(s)) {
  97. chAttr = SCE_C_WORD2;
  98. }
  99. else if (c_keywords.InList(s)) {
  100. chAttr = SCE_C_UUID;
  101. }
  102. }
  103. if (*bAarea) {
  104. if (strcmp(s, "division") == 0) {
  105. ret = IN_DIVISION;
  106. // we've determined the containment, anything else is just ignored for those purposes
  107. *bAarea = false;
  108. } else if (strcmp(s, "declaratives") == 0) {
  109. ret = IN_DIVISION | IN_DECLARATIVES;
  110. if (nContainment & IN_DECLARATIVES)
  111. ret |= NOT_HEADER | IN_SECTION;
  112. // we've determined the containment, anything else is just ignored for those purposes
  113. *bAarea = false;
  114. } else if (strcmp(s, "section") == 0) {
  115. ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION;
  116. // we've determined the containment, anything else is just ignored for those purposes
  117. *bAarea = false;
  118. } else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
  119. ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER;
  120. } else {
  121. ret = nContainment | IN_PARAGRAPH;
  122. }
  123. }
  124. ColourTo(styler, end, chAttr);
  125. return ret;
  126. }
  127. static void ColouriseCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
  128. Accessor &styler) {
  129. styler.StartAt(startPos);
  130. int state = initStyle;
  131. if (state == SCE_C_CHARACTER) // Does not leak onto next line
  132. state = SCE_C_DEFAULT;
  133. char chPrev = ' ';
  134. char chNext = styler[startPos];
  135. Sci_PositionU lengthDoc = startPos + length;
  136. int nContainment;
  137. Sci_Position currentLine = styler.GetLine(startPos);
  138. if (currentLine > 0) {
  139. styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
  140. nContainment = styler.GetLineState(currentLine);
  141. nContainment &= ~NOT_HEADER;
  142. } else {
  143. styler.SetLineState(currentLine, 0);
  144. nContainment = 0;
  145. }
  146. styler.StartSegment(startPos);
  147. bool bNewLine = true;
  148. bool bAarea = !isspacechar(chNext);
  149. int column = 0;
  150. for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
  151. char ch = chNext;
  152. chNext = styler.SafeGetCharAt(i + 1);
  153. ++column;
  154. if (bNewLine) {
  155. column = 0;
  156. }
  157. if (column <= 1 && !bAarea) {
  158. bAarea = !isspacechar(ch);
  159. }
  160. bool bSetNewLine = false;
  161. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  162. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  163. // Avoid triggering two times on Dos/Win
  164. // End of line
  165. if (state == SCE_C_CHARACTER) {
  166. ColourTo(styler, i, state);
  167. state = SCE_C_DEFAULT;
  168. }
  169. styler.SetLineState(currentLine, nContainment);
  170. currentLine++;
  171. bSetNewLine = true;
  172. if (nContainment & NOT_HEADER)
  173. nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION);
  174. }
  175. if (styler.IsLeadByte(ch)) {
  176. chNext = styler.SafeGetCharAt(i + 2);
  177. chPrev = ' ';
  178. i += 1;
  179. continue;
  180. }
  181. if (state == SCE_C_DEFAULT) {
  182. if (isCOBOLwordstart(ch) || (ch == '$' && IsASCII(chNext) && isalpha(chNext))) {
  183. ColourTo(styler, i-1, state);
  184. state = SCE_C_IDENTIFIER;
  185. } else if (column == 6 && ch == '*') {
  186. // Cobol comment line: asterisk in column 7.
  187. ColourTo(styler, i-1, state);
  188. state = SCE_C_COMMENTLINE;
  189. } else if (ch == '*' && chNext == '>') {
  190. // Cobol inline comment: asterisk, followed by greater than.
  191. ColourTo(styler, i-1, state);
  192. state = SCE_C_COMMENTLINE;
  193. } else if (column == 0 && ch == '*' && chNext != '*') {
  194. ColourTo(styler, i-1, state);
  195. state = SCE_C_COMMENTLINE;
  196. } else if (column == 0 && ch == '/' && chNext != '*') {
  197. ColourTo(styler, i-1, state);
  198. state = SCE_C_COMMENTLINE;
  199. } else if (column == 0 && ch == '*' && chNext == '*') {
  200. ColourTo(styler, i-1, state);
  201. state = SCE_C_COMMENTDOC;
  202. } else if (column == 0 && ch == '/' && chNext == '*') {
  203. ColourTo(styler, i-1, state);
  204. state = SCE_C_COMMENTDOC;
  205. } else if (ch == '"') {
  206. ColourTo(styler, i-1, state);
  207. state = SCE_C_STRING;
  208. } else if (ch == '\'') {
  209. ColourTo(styler, i-1, state);
  210. state = SCE_C_CHARACTER;
  211. } else if (ch == '?' && column == 0) {
  212. ColourTo(styler, i-1, state);
  213. state = SCE_C_PREPROCESSOR;
  214. } else if (isCOBOLoperator(ch)) {
  215. ColourTo(styler, i-1, state);
  216. ColourTo(styler, i, SCE_C_OPERATOR);
  217. }
  218. } else if (state == SCE_C_IDENTIFIER) {
  219. if (!isCOBOLwordchar(ch)) {
  220. int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
  221. if(lStateChange != 0) {
  222. styler.SetLineState(currentLine, lStateChange);
  223. nContainment = lStateChange;
  224. }
  225. state = SCE_C_DEFAULT;
  226. chNext = styler.SafeGetCharAt(i + 1);
  227. if (ch == '"') {
  228. state = SCE_C_STRING;
  229. } else if (ch == '\'') {
  230. state = SCE_C_CHARACTER;
  231. } else if (isCOBOLoperator(ch)) {
  232. ColourTo(styler, i, SCE_C_OPERATOR);
  233. }
  234. }
  235. } else {
  236. if (state == SCE_C_PREPROCESSOR) {
  237. if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) {
  238. ColourTo(styler, i-1, state);
  239. state = SCE_C_DEFAULT;
  240. }
  241. } else if (state == SCE_C_COMMENT) {
  242. if (ch == '\r' || ch == '\n') {
  243. ColourTo(styler, i, state);
  244. state = SCE_C_DEFAULT;
  245. }
  246. } else if (state == SCE_C_COMMENTDOC) {
  247. if (ch == '\r' || ch == '\n') {
  248. if (((i > styler.GetStartSegment() + 2) || (
  249. (initStyle == SCE_C_COMMENTDOC) &&
  250. (styler.GetStartSegment() == static_cast<Sci_PositionU>(startPos))))) {
  251. ColourTo(styler, i, state);
  252. state = SCE_C_DEFAULT;
  253. }
  254. }
  255. } else if (state == SCE_C_COMMENTLINE) {
  256. if (ch == '\r' || ch == '\n') {
  257. ColourTo(styler, i-1, state);
  258. state = SCE_C_DEFAULT;
  259. }
  260. } else if (state == SCE_C_STRING) {
  261. if (ch == '"') {
  262. ColourTo(styler, i, state);
  263. state = SCE_C_DEFAULT;
  264. }
  265. } else if (state == SCE_C_CHARACTER) {
  266. if (ch == '\'') {
  267. ColourTo(styler, i, state);
  268. state = SCE_C_DEFAULT;
  269. }
  270. }
  271. }
  272. chPrev = ch;
  273. bNewLine = bSetNewLine;
  274. if (bNewLine)
  275. {
  276. bAarea = false;
  277. }
  278. }
  279. ColourTo(styler, lengthDoc - 1, state);
  280. }
  281. static void FoldCOBOLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[],
  282. Accessor &styler) {
  283. bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  284. Sci_PositionU endPos = startPos + length;
  285. int visibleChars = 0;
  286. Sci_Position lineCurrent = styler.GetLine(startPos);
  287. int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
  288. char chNext = styler[startPos];
  289. bool bNewLine = true;
  290. bool bAarea = !isspacechar(chNext);
  291. int column = 0;
  292. bool bComment = false;
  293. for (Sci_PositionU i = startPos; i < endPos; i++) {
  294. char ch = chNext;
  295. chNext = styler.SafeGetCharAt(i + 1);
  296. ++column;
  297. if (bNewLine) {
  298. column = 0;
  299. bComment = (ch == '*' || ch == '/' || ch == '?');
  300. }
  301. if (column <= 1 && !bAarea) {
  302. bAarea = !isspacechar(ch);
  303. }
  304. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  305. if (atEOL) {
  306. int nContainment = styler.GetLineState(lineCurrent);
  307. int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE;
  308. if (bAarea && !bComment)
  309. --lev;
  310. if (visibleChars == 0 && foldCompact)
  311. lev |= SC_FOLDLEVELWHITEFLAG;
  312. if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
  313. lev |= SC_FOLDLEVELHEADERFLAG;
  314. if (lev != styler.LevelAt(lineCurrent)) {
  315. styler.SetLevel(lineCurrent, lev);
  316. }
  317. if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
  318. // this level is at the same level or less than the previous line
  319. // therefore these is nothing for the previous header to collapse, so remove the header
  320. styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
  321. }
  322. levelPrev = lev;
  323. visibleChars = 0;
  324. bAarea = false;
  325. bNewLine = true;
  326. lineCurrent++;
  327. } else {
  328. bNewLine = false;
  329. }
  330. if (!isspacechar(ch))
  331. visibleChars++;
  332. }
  333. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  334. int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
  335. styler.SetLevel(lineCurrent, levelPrev | flagsNext);
  336. }
  337. static const char * const COBOLWordListDesc[] = {
  338. "A Keywords",
  339. "B Keywords",
  340. "Extended Keywords",
  341. 0
  342. };
  343. LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);