LexAsm.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. // Scintilla source code edit control
  2. /** @file LexAsm.cxx
  3. ** Lexer for Assembler, just for the MASM syntax
  4. ** Written by The Black Horus
  5. ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
  6. ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
  7. ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
  8. **/
  9. // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
  10. // The License.txt file describes the conditions under which this software may be distributed.
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <stdio.h>
  14. #include <stdarg.h>
  15. #include <assert.h>
  16. #include <ctype.h>
  17. #include <string>
  18. #include <map>
  19. #include <set>
  20. #include "ILexer.h"
  21. #include "Scintilla.h"
  22. #include "SciLexer.h"
  23. #include "WordList.h"
  24. #include "LexAccessor.h"
  25. #include "StyleContext.h"
  26. #include "CharacterSet.h"
  27. #include "LexerModule.h"
  28. #include "OptionSet.h"
  29. #ifdef SCI_NAMESPACE
  30. using namespace Scintilla;
  31. #endif
  32. static inline bool IsAWordChar(const int ch) {
  33. return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
  34. ch == '_' || ch == '?');
  35. }
  36. static inline bool IsAWordStart(const int ch) {
  37. return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
  38. ch == '%' || ch == '@' || ch == '$' || ch == '?');
  39. }
  40. static inline bool IsAsmOperator(const int ch) {
  41. if ((ch < 0x80) && (isalnum(ch)))
  42. return false;
  43. // '.' left out as it is used to make up numbers
  44. if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
  45. ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
  46. ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
  47. ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
  48. ch == '%' || ch == ':')
  49. return true;
  50. return false;
  51. }
  52. static bool IsStreamCommentStyle(int style) {
  53. return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
  54. }
  55. static inline int LowerCase(int c) {
  56. if (c >= 'A' && c <= 'Z')
  57. return 'a' + c - 'A';
  58. return c;
  59. }
  60. // An individual named option for use in an OptionSet
  61. // Options used for LexerAsm
  62. struct OptionsAsm {
  63. std::string delimiter;
  64. bool fold;
  65. bool foldSyntaxBased;
  66. bool foldCommentMultiline;
  67. bool foldCommentExplicit;
  68. std::string foldExplicitStart;
  69. std::string foldExplicitEnd;
  70. bool foldExplicitAnywhere;
  71. bool foldCompact;
  72. OptionsAsm() {
  73. delimiter = "";
  74. fold = false;
  75. foldSyntaxBased = true;
  76. foldCommentMultiline = false;
  77. foldCommentExplicit = false;
  78. foldExplicitStart = "";
  79. foldExplicitEnd = "";
  80. foldExplicitAnywhere = false;
  81. foldCompact = true;
  82. }
  83. };
  84. static const char * const asmWordListDesc[] = {
  85. "CPU instructions",
  86. "FPU instructions",
  87. "Registers",
  88. "Directives",
  89. "Directive operands",
  90. "Extended instructions",
  91. "Directives4Foldstart",
  92. "Directives4Foldend",
  93. 0
  94. };
  95. struct OptionSetAsm : public OptionSet<OptionsAsm> {
  96. OptionSetAsm() {
  97. DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
  98. "Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
  99. DefineProperty("fold", &OptionsAsm::fold);
  100. DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
  101. "Set this property to 0 to disable syntax based folding.");
  102. DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
  103. "Set this property to 1 to enable folding multi-line comments.");
  104. DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
  105. "This option enables folding explicit fold points when using the Asm lexer. "
  106. "Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
  107. "at the end of a section that should fold.");
  108. DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
  109. "The string to use for explicit fold start points, replacing the standard ;{.");
  110. DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
  111. "The string to use for explicit fold end points, replacing the standard ;}.");
  112. DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
  113. "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
  114. DefineProperty("fold.compact", &OptionsAsm::foldCompact);
  115. DefineWordListSets(asmWordListDesc);
  116. }
  117. };
  118. class LexerAsm : public ILexer {
  119. WordList cpuInstruction;
  120. WordList mathInstruction;
  121. WordList registers;
  122. WordList directive;
  123. WordList directiveOperand;
  124. WordList extInstruction;
  125. WordList directives4foldstart;
  126. WordList directives4foldend;
  127. OptionsAsm options;
  128. OptionSetAsm osAsm;
  129. int commentChar;
  130. public:
  131. LexerAsm(int commentChar_) {
  132. commentChar = commentChar_;
  133. }
  134. virtual ~LexerAsm() {
  135. }
  136. void SCI_METHOD Release() {
  137. delete this;
  138. }
  139. int SCI_METHOD Version() const {
  140. return lvOriginal;
  141. }
  142. const char * SCI_METHOD PropertyNames() {
  143. return osAsm.PropertyNames();
  144. }
  145. int SCI_METHOD PropertyType(const char *name) {
  146. return osAsm.PropertyType(name);
  147. }
  148. const char * SCI_METHOD DescribeProperty(const char *name) {
  149. return osAsm.DescribeProperty(name);
  150. }
  151. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
  152. const char * SCI_METHOD DescribeWordListSets() {
  153. return osAsm.DescribeWordListSets();
  154. }
  155. Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
  156. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  157. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  158. void * SCI_METHOD PrivateCall(int, void *) {
  159. return 0;
  160. }
  161. static ILexer *LexerFactoryAsm() {
  162. return new LexerAsm(';');
  163. }
  164. static ILexer *LexerFactoryAs() {
  165. return new LexerAsm('#');
  166. }
  167. };
  168. Sci_Position SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
  169. if (osAsm.PropertySet(&options, key, val)) {
  170. return 0;
  171. }
  172. return -1;
  173. }
  174. Sci_Position SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
  175. WordList *wordListN = 0;
  176. switch (n) {
  177. case 0:
  178. wordListN = &cpuInstruction;
  179. break;
  180. case 1:
  181. wordListN = &mathInstruction;
  182. break;
  183. case 2:
  184. wordListN = &registers;
  185. break;
  186. case 3:
  187. wordListN = &directive;
  188. break;
  189. case 4:
  190. wordListN = &directiveOperand;
  191. break;
  192. case 5:
  193. wordListN = &extInstruction;
  194. break;
  195. case 6:
  196. wordListN = &directives4foldstart;
  197. break;
  198. case 7:
  199. wordListN = &directives4foldend;
  200. break;
  201. }
  202. Sci_Position firstModification = -1;
  203. if (wordListN) {
  204. WordList wlNew;
  205. wlNew.Set(wl);
  206. if (*wordListN != wlNew) {
  207. wordListN->Set(wl);
  208. firstModification = 0;
  209. }
  210. }
  211. return firstModification;
  212. }
  213. void SCI_METHOD LexerAsm::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  214. LexAccessor styler(pAccess);
  215. // Do not leak onto next line
  216. if (initStyle == SCE_ASM_STRINGEOL)
  217. initStyle = SCE_ASM_DEFAULT;
  218. StyleContext sc(startPos, length, initStyle, styler);
  219. for (; sc.More(); sc.Forward())
  220. {
  221. // Prevent SCE_ASM_STRINGEOL from leaking back to previous line
  222. if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
  223. sc.SetState(SCE_ASM_STRING);
  224. } else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
  225. sc.SetState(SCE_ASM_CHARACTER);
  226. }
  227. // Handle line continuation generically.
  228. if (sc.ch == '\\') {
  229. if (sc.chNext == '\n' || sc.chNext == '\r') {
  230. sc.Forward();
  231. if (sc.ch == '\r' && sc.chNext == '\n') {
  232. sc.Forward();
  233. }
  234. continue;
  235. }
  236. }
  237. // Determine if the current state should terminate.
  238. if (sc.state == SCE_ASM_OPERATOR) {
  239. if (!IsAsmOperator(sc.ch)) {
  240. sc.SetState(SCE_ASM_DEFAULT);
  241. }
  242. } else if (sc.state == SCE_ASM_NUMBER) {
  243. if (!IsAWordChar(sc.ch)) {
  244. sc.SetState(SCE_ASM_DEFAULT);
  245. }
  246. } else if (sc.state == SCE_ASM_IDENTIFIER) {
  247. if (!IsAWordChar(sc.ch) ) {
  248. char s[100];
  249. sc.GetCurrentLowered(s, sizeof(s));
  250. bool IsDirective = false;
  251. if (cpuInstruction.InList(s)) {
  252. sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
  253. } else if (mathInstruction.InList(s)) {
  254. sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
  255. } else if (registers.InList(s)) {
  256. sc.ChangeState(SCE_ASM_REGISTER);
  257. } else if (directive.InList(s)) {
  258. sc.ChangeState(SCE_ASM_DIRECTIVE);
  259. IsDirective = true;
  260. } else if (directiveOperand.InList(s)) {
  261. sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
  262. } else if (extInstruction.InList(s)) {
  263. sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
  264. }
  265. sc.SetState(SCE_ASM_DEFAULT);
  266. if (IsDirective && !strcmp(s, "comment")) {
  267. char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
  268. while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
  269. sc.ForwardSetState(SCE_ASM_DEFAULT);
  270. }
  271. if (sc.ch == delimiter) {
  272. sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
  273. }
  274. }
  275. }
  276. } else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
  277. char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
  278. if (sc.ch == delimiter) {
  279. while (!sc.atLineEnd) {
  280. sc.Forward();
  281. }
  282. sc.SetState(SCE_ASM_DEFAULT);
  283. }
  284. } else if (sc.state == SCE_ASM_COMMENT ) {
  285. if (sc.atLineEnd) {
  286. sc.SetState(SCE_ASM_DEFAULT);
  287. }
  288. } else if (sc.state == SCE_ASM_STRING) {
  289. if (sc.ch == '\\') {
  290. if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
  291. sc.Forward();
  292. }
  293. } else if (sc.ch == '\"') {
  294. sc.ForwardSetState(SCE_ASM_DEFAULT);
  295. } else if (sc.atLineEnd) {
  296. sc.ChangeState(SCE_ASM_STRINGEOL);
  297. sc.ForwardSetState(SCE_ASM_DEFAULT);
  298. }
  299. } else if (sc.state == SCE_ASM_CHARACTER) {
  300. if (sc.ch == '\\') {
  301. if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
  302. sc.Forward();
  303. }
  304. } else if (sc.ch == '\'') {
  305. sc.ForwardSetState(SCE_ASM_DEFAULT);
  306. } else if (sc.atLineEnd) {
  307. sc.ChangeState(SCE_ASM_STRINGEOL);
  308. sc.ForwardSetState(SCE_ASM_DEFAULT);
  309. }
  310. }
  311. // Determine if a new state should be entered.
  312. if (sc.state == SCE_ASM_DEFAULT) {
  313. if (sc.ch == commentChar){
  314. sc.SetState(SCE_ASM_COMMENT);
  315. } else if (IsASCII(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && IsASCII(sc.chNext) && isdigit(sc.chNext)))) {
  316. sc.SetState(SCE_ASM_NUMBER);
  317. } else if (IsAWordStart(sc.ch)) {
  318. sc.SetState(SCE_ASM_IDENTIFIER);
  319. } else if (sc.ch == '\"') {
  320. sc.SetState(SCE_ASM_STRING);
  321. } else if (sc.ch == '\'') {
  322. sc.SetState(SCE_ASM_CHARACTER);
  323. } else if (IsAsmOperator(sc.ch)) {
  324. sc.SetState(SCE_ASM_OPERATOR);
  325. }
  326. }
  327. }
  328. sc.Complete();
  329. }
  330. // Store both the current line's fold level and the next lines in the
  331. // level store to make it easy to pick up with each increment
  332. // and to make it possible to fiddle the current level for "else".
  333. void SCI_METHOD LexerAsm::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  334. if (!options.fold)
  335. return;
  336. LexAccessor styler(pAccess);
  337. Sci_PositionU endPos = startPos + length;
  338. int visibleChars = 0;
  339. Sci_Position lineCurrent = styler.GetLine(startPos);
  340. int levelCurrent = SC_FOLDLEVELBASE;
  341. if (lineCurrent > 0)
  342. levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
  343. int levelNext = levelCurrent;
  344. char chNext = styler[startPos];
  345. int styleNext = styler.StyleAt(startPos);
  346. int style = initStyle;
  347. char word[100];
  348. int wordlen = 0;
  349. const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
  350. for (Sci_PositionU i = startPos; i < endPos; i++) {
  351. char ch = chNext;
  352. chNext = styler.SafeGetCharAt(i + 1);
  353. int stylePrev = style;
  354. style = styleNext;
  355. styleNext = styler.StyleAt(i + 1);
  356. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  357. if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
  358. if (!IsStreamCommentStyle(stylePrev)) {
  359. levelNext++;
  360. } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
  361. // Comments don't end at end of line and the next character may be unstyled.
  362. levelNext--;
  363. }
  364. }
  365. if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
  366. if (userDefinedFoldMarkers) {
  367. if (styler.Match(i, options.foldExplicitStart.c_str())) {
  368. levelNext++;
  369. } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
  370. levelNext--;
  371. }
  372. } else {
  373. if (ch == ';') {
  374. if (chNext == '{') {
  375. levelNext++;
  376. } else if (chNext == '}') {
  377. levelNext--;
  378. }
  379. }
  380. }
  381. }
  382. if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
  383. word[wordlen++] = static_cast<char>(LowerCase(ch));
  384. if (wordlen == 100) { // prevent overflow
  385. word[0] = '\0';
  386. wordlen = 1;
  387. }
  388. if (styleNext != SCE_ASM_DIRECTIVE) { // reading directive ready
  389. word[wordlen] = '\0';
  390. wordlen = 0;
  391. if (directives4foldstart.InList(word)) {
  392. levelNext++;
  393. } else if (directives4foldend.InList(word)){
  394. levelNext--;
  395. }
  396. }
  397. }
  398. if (!IsASpace(ch))
  399. visibleChars++;
  400. if (atEOL || (i == endPos-1)) {
  401. int levelUse = levelCurrent;
  402. int lev = levelUse | levelNext << 16;
  403. if (visibleChars == 0 && options.foldCompact)
  404. lev |= SC_FOLDLEVELWHITEFLAG;
  405. if (levelUse < levelNext)
  406. lev |= SC_FOLDLEVELHEADERFLAG;
  407. if (lev != styler.LevelAt(lineCurrent)) {
  408. styler.SetLevel(lineCurrent, lev);
  409. }
  410. lineCurrent++;
  411. levelCurrent = levelNext;
  412. if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length() - 1))) {
  413. // There is an empty line at end of file so give it same level and empty
  414. styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
  415. }
  416. visibleChars = 0;
  417. }
  418. }
  419. }
  420. LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
  421. LexerModule lmAs(SCLEX_AS, LexerAsm::LexerFactoryAs, "as", asmWordListDesc);