LexA68k.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // Scintilla source code edit control
  2. /** @file LexA68k.cxx
  3. ** Lexer for Assembler, just for the MASM syntax
  4. ** Written by Martial Demolins AKA Folco
  5. **/
  6. // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
  7. // The License.txt file describes the conditions under which this software
  8. // may be distributed.
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include <assert.h>
  14. #include <ctype.h>
  15. #include "ILexer.h"
  16. #include "Scintilla.h"
  17. #include "SciLexer.h"
  18. #include "WordList.h"
  19. #include "LexAccessor.h"
  20. #include "Accessor.h"
  21. #include "StyleContext.h"
  22. #include "CharacterSet.h"
  23. #include "LexerModule.h"
  24. #ifdef SCI_NAMESPACE
  25. using namespace Scintilla;
  26. #endif
  27. // Return values for GetOperatorType
  28. #define NO_OPERATOR 0
  29. #define OPERATOR_1CHAR 1
  30. #define OPERATOR_2CHAR 2
  31. /**
  32. * IsIdentifierStart
  33. *
  34. * Return true if the given char is a valid identifier first char
  35. */
  36. static inline bool IsIdentifierStart (const int ch)
  37. {
  38. return (isalpha(ch) || (ch == '_') || (ch == '\\'));
  39. }
  40. /**
  41. * IsIdentifierChar
  42. *
  43. * Return true if the given char is a valid identifier char
  44. */
  45. static inline bool IsIdentifierChar (const int ch)
  46. {
  47. return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
  48. }
  49. /**
  50. * GetOperatorType
  51. *
  52. * Return:
  53. * NO_OPERATOR if char is not an operator
  54. * OPERATOR_1CHAR if the operator is one char long
  55. * OPERATOR_2CHAR if the operator is two chars long
  56. */
  57. static inline int GetOperatorType (const int ch1, const int ch2)
  58. {
  59. int OpType = NO_OPERATOR;
  60. if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
  61. (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
  62. OpType = OPERATOR_1CHAR;
  63. else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
  64. OpType = OPERATOR_2CHAR;
  65. return OpType;
  66. }
  67. /**
  68. * IsBin
  69. *
  70. * Return true if the given char is 0 or 1
  71. */
  72. static inline bool IsBin (const int ch)
  73. {
  74. return (ch == '0') || (ch == '1');
  75. }
  76. /**
  77. * IsDoxygenChar
  78. *
  79. * Return true if the char may be part of a Doxygen keyword
  80. */
  81. static inline bool IsDoxygenChar (const int ch)
  82. {
  83. return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
  84. }
  85. /**
  86. * ColouriseA68kDoc
  87. *
  88. * Main function, which colourises a 68k source
  89. */
  90. static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
  91. {
  92. // Used to buffer a string, to be able to compare it using built-in functions
  93. char Buffer[100];
  94. // Used to know the length of an operator
  95. int OpType;
  96. // Get references to keywords lists
  97. WordList &cpuInstruction = *keywordlists[0];
  98. WordList &registers = *keywordlists[1];
  99. WordList &directive = *keywordlists[2];
  100. WordList &extInstruction = *keywordlists[3];
  101. WordList &alert = *keywordlists[4];
  102. WordList &doxygenKeyword = *keywordlists[5];
  103. // Instanciate a context for our source
  104. StyleContext sc(startPos, length, initStyle, styler);
  105. /************************************************************
  106. *
  107. * Parse the source
  108. *
  109. ************************************************************/
  110. for ( ; sc.More(); sc.Forward())
  111. {
  112. /************************************************************
  113. *
  114. * A style always terminates at the end of a line, even for
  115. * comments (no multi-lines comments)
  116. *
  117. ************************************************************/
  118. if (sc.atLineStart) {
  119. sc.SetState(SCE_A68K_DEFAULT);
  120. }
  121. /************************************************************
  122. *
  123. * If we are not in "default style", check if the style continues
  124. * In this case, we just have to loop
  125. *
  126. ************************************************************/
  127. if (sc.state != SCE_A68K_DEFAULT)
  128. {
  129. if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
  130. || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
  131. || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
  132. || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
  133. || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
  134. || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
  135. || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point)
  136. || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier
  137. || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local)
  138. || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword
  139. || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert
  140. || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
  141. {
  142. continue;
  143. }
  144. /************************************************************
  145. *
  146. * Check if current state terminates
  147. *
  148. ************************************************************/
  149. // Strings: include terminal ' or " in the current string by skipping it
  150. if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
  151. sc.Forward();
  152. }
  153. // If a macro declaration was terminated with ':', it was a label
  154. else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
  155. sc.ChangeState(SCE_A68K_LABEL);
  156. }
  157. // If it wasn't a Doxygen keyword, change it to normal comment
  158. else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
  159. sc.GetCurrent(Buffer, sizeof(Buffer));
  160. if (!doxygenKeyword.InList(Buffer)) {
  161. sc.ChangeState(SCE_A68K_COMMENT);
  162. }
  163. sc.SetState(SCE_A68K_COMMENT);
  164. continue;
  165. }
  166. // If it wasn't an Alert, change it to normal comment
  167. else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
  168. sc.GetCurrent(Buffer, sizeof(Buffer));
  169. if (!alert.InList(Buffer)) {
  170. sc.ChangeState(SCE_A68K_COMMENT);
  171. }
  172. // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
  173. if (sc.ch == '\\') {
  174. sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
  175. }
  176. else {
  177. sc.SetState(SCE_A68K_COMMENT);
  178. }
  179. continue;
  180. }
  181. // If we are in a comment, it's a Doxygen keyword or an Alert
  182. else if (sc.state == SCE_A68K_COMMENT) {
  183. if (sc.ch == '\\') {
  184. sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
  185. }
  186. else {
  187. sc.SetState(SCE_A68K_COMMENT_SPECIAL);
  188. }
  189. continue;
  190. }
  191. // Check if we are at the end of an identifier
  192. // In this case, colourise it if was a keyword.
  193. else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
  194. sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
  195. if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
  196. sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
  197. }
  198. else if (extInstruction.InList(Buffer)) {
  199. sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
  200. }
  201. else if (registers.InList(Buffer)) {
  202. sc.ChangeState(SCE_A68K_REGISTER);
  203. }
  204. else if (directive.InList(Buffer)) {
  205. sc.ChangeState(SCE_A68K_DIRECTIVE);
  206. }
  207. }
  208. // All special contexts are now handled.Come back to default style
  209. sc.SetState(SCE_A68K_DEFAULT);
  210. }
  211. /************************************************************
  212. *
  213. * Check if we must enter a new state
  214. *
  215. ************************************************************/
  216. // Something which begins at the beginning of a line, and with
  217. // - '\' + an identifier start char, or
  218. // - '\\@' + an identifier start char
  219. // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
  220. if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
  221. sc.SetState(SCE_A68K_LABEL);
  222. }
  223. if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
  224. sc.Forward(2);
  225. if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
  226. sc.ChangeState(SCE_A68K_LABEL);
  227. sc.SetState(SCE_A68K_LABEL);
  228. }
  229. }
  230. // Label and macro identifiers start at the beginning of a line
  231. // We set both as a macro id, but if it wasn't one (':' at the end),
  232. // it will be changed as a label.
  233. if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
  234. sc.SetState(SCE_A68K_MACRO_DECLARATION);
  235. }
  236. else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match
  237. sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment
  238. }
  239. else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
  240. sc.SetState(SCE_A68K_NUMBER_DEC);
  241. }
  242. else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
  243. sc.SetState(SCE_A68K_NUMBER_BIN);
  244. }
  245. else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
  246. sc.SetState(SCE_A68K_NUMBER_HEX);
  247. }
  248. else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
  249. sc.SetState(SCE_A68K_STRING1);
  250. }
  251. else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
  252. sc.SetState(SCE_A68K_STRING2);
  253. }
  254. else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\'
  255. sc.SetState(SCE_A68K_MACRO_ARG);
  256. }
  257. else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
  258. sc.SetState(SCE_A68K_IDENTIFIER);
  259. }
  260. else {
  261. if (sc.ch < 0x80) {
  262. OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
  263. if (OpType != NO_OPERATOR) {
  264. sc.SetState(SCE_A68K_OPERATOR);
  265. if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
  266. sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
  267. }
  268. }
  269. }
  270. }
  271. } // End of for()
  272. sc.Complete();
  273. }
  274. // Names of the keyword lists
  275. static const char * const a68kWordListDesc[] =
  276. {
  277. "CPU instructions",
  278. "Registers",
  279. "Directives",
  280. "Extended instructions",
  281. "Comment special words",
  282. "Doxygen keywords",
  283. 0
  284. };
  285. LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);