LexHaskell.cpp 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. /******************************************************************
  2. * LexHaskell.cxx
  3. *
  4. * A haskell lexer for the scintilla code control.
  5. * Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
  6. * External lexer stuff inspired from the caml external lexer.
  7. * Folder copied from Python's.
  8. *
  9. * Written by Tobias Engvall - tumm at dtek dot chalmers dot se
  10. *
  11. * Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
  12. *
  13. * Improved by kudah <kudahkukarek@gmail.com>
  14. *
  15. * TODO:
  16. * * A proper lexical folder to fold group declarations, comments, pragmas,
  17. * #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
  18. * etc.
  19. *
  20. *****************************************************************/
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <stdio.h>
  24. #include <stdarg.h>
  25. #include <assert.h>
  26. #include <ctype.h>
  27. #include <string>
  28. #include <map>
  29. #include "ILexer.h"
  30. #include "Scintilla.h"
  31. #include "SciLexer.h"
  32. #include "PropSetSimple.h"
  33. #include "WordList.h"
  34. #include "LexAccessor.h"
  35. #include "Accessor.h"
  36. #include "StyleContext.h"
  37. #include "CharacterSet.h"
  38. #include "CharacterCategory.h"
  39. #include "LexerModule.h"
  40. #include "OptionSet.h"
  41. #ifdef SCI_NAMESPACE
  42. using namespace Scintilla;
  43. #endif
  44. // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
  45. // Note, letter modifiers are prohibited.
  46. static int u_iswupper (int ch) {
  47. CharacterCategory c = CategoriseCharacter(ch);
  48. return c == ccLu || c == ccLt;
  49. }
  50. static int u_iswalpha (int ch) {
  51. CharacterCategory c = CategoriseCharacter(ch);
  52. return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
  53. }
  54. static int u_iswalnum (int ch) {
  55. CharacterCategory c = CategoriseCharacter(ch);
  56. return c == ccLl || c == ccLu || c == ccLt || c == ccLo
  57. || c == ccNd || c == ccNo;
  58. }
  59. static int u_IsHaskellSymbol(int ch) {
  60. CharacterCategory c = CategoriseCharacter(ch);
  61. return c == ccPc || c == ccPd || c == ccPo
  62. || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
  63. }
  64. static inline bool IsHaskellLetter(const int ch) {
  65. if (IsASCII(ch)) {
  66. return (ch >= 'a' && ch <= 'z')
  67. || (ch >= 'A' && ch <= 'Z');
  68. } else {
  69. return u_iswalpha(ch) != 0;
  70. }
  71. }
  72. static inline bool IsHaskellAlphaNumeric(const int ch) {
  73. if (IsASCII(ch)) {
  74. return IsAlphaNumeric(ch);
  75. } else {
  76. return u_iswalnum(ch) != 0;
  77. }
  78. }
  79. static inline bool IsHaskellUpperCase(const int ch) {
  80. if (IsASCII(ch)) {
  81. return ch >= 'A' && ch <= 'Z';
  82. } else {
  83. return u_iswupper(ch) != 0;
  84. }
  85. }
  86. static inline bool IsAnHaskellOperatorChar(const int ch) {
  87. if (IsASCII(ch)) {
  88. return
  89. ( ch == '!' || ch == '#' || ch == '$' || ch == '%'
  90. || ch == '&' || ch == '*' || ch == '+' || ch == '-'
  91. || ch == '.' || ch == '/' || ch == ':' || ch == '<'
  92. || ch == '=' || ch == '>' || ch == '?' || ch == '@'
  93. || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
  94. } else {
  95. return u_IsHaskellSymbol(ch) != 0;
  96. }
  97. }
  98. static inline bool IsAHaskellWordStart(const int ch) {
  99. return IsHaskellLetter(ch) || ch == '_';
  100. }
  101. static inline bool IsAHaskellWordChar(const int ch) {
  102. return ( IsHaskellAlphaNumeric(ch)
  103. || ch == '_'
  104. || ch == '\'');
  105. }
  106. static inline bool IsCommentBlockStyle(int style) {
  107. return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
  108. }
  109. static inline bool IsCommentStyle(int style) {
  110. return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
  111. || ( style == SCE_HA_LITERATE_COMMENT
  112. || style == SCE_HA_LITERATE_CODEDELIM);
  113. }
  114. // styles which do not belong to Haskell, but to external tools
  115. static inline bool IsExternalStyle(int style) {
  116. return ( style == SCE_HA_PREPROCESSOR
  117. || style == SCE_HA_LITERATE_COMMENT
  118. || style == SCE_HA_LITERATE_CODEDELIM);
  119. }
  120. static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
  121. return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
  122. }
  123. // Mangled version of lexlib/Accessor.cxx IndentAmount.
  124. // Modified to treat comment blocks as whitespace
  125. // plus special case for commentline/preprocessor.
  126. static int HaskellIndentAmount(Accessor &styler, const Sci_Position line) {
  127. // Determines the indentation level of the current line
  128. // Comment blocks are treated as whitespace
  129. Sci_Position pos = styler.LineStart(line);
  130. Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  131. char ch = styler[pos];
  132. int style = styler.StyleAt(pos);
  133. int indent = 0;
  134. bool inPrevPrefix = line > 0;
  135. Sci_Position posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
  136. while (( ch == ' ' || ch == '\t'
  137. || IsCommentBlockStyle(style)
  138. || style == SCE_HA_LITERATE_CODEDELIM)
  139. && (pos < eol_pos)) {
  140. if (inPrevPrefix) {
  141. char chPrev = styler[posPrev++];
  142. if (chPrev != ' ' && chPrev != '\t') {
  143. inPrevPrefix = false;
  144. }
  145. }
  146. if (ch == '\t') {
  147. indent = (indent / 8 + 1) * 8;
  148. } else { // Space or comment block
  149. indent++;
  150. }
  151. pos++;
  152. ch = styler[pos];
  153. style = styler.StyleAt(pos);
  154. }
  155. indent += SC_FOLDLEVELBASE;
  156. // if completely empty line or the start of a comment or preprocessor...
  157. if ( styler.LineStart(line) == styler.Length()
  158. || ch == ' '
  159. || ch == '\t'
  160. || ch == '\n'
  161. || ch == '\r'
  162. || IsCommentStyle(style)
  163. || style == SCE_HA_PREPROCESSOR)
  164. return indent | SC_FOLDLEVELWHITEFLAG;
  165. else
  166. return indent;
  167. }
  168. struct OptionsHaskell {
  169. bool magicHash;
  170. bool allowQuotes;
  171. bool implicitParams;
  172. bool highlightSafe;
  173. bool cpp;
  174. bool stylingWithinPreprocessor;
  175. bool fold;
  176. bool foldComment;
  177. bool foldCompact;
  178. bool foldImports;
  179. OptionsHaskell() {
  180. magicHash = true; // Widespread use, enabled by default.
  181. allowQuotes = true; // Widespread use, enabled by default.
  182. implicitParams = false; // Fell out of favor, seldom used, disabled.
  183. highlightSafe = true; // Moderately used, doesn't hurt to enable.
  184. cpp = true; // Widespread use, enabled by default;
  185. stylingWithinPreprocessor = false;
  186. fold = false;
  187. foldComment = false;
  188. foldCompact = false;
  189. foldImports = false;
  190. }
  191. };
  192. static const char * const haskellWordListDesc[] = {
  193. "Keywords",
  194. "FFI",
  195. "Reserved operators",
  196. 0
  197. };
  198. struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
  199. OptionSetHaskell() {
  200. DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
  201. "Set to 0 to disallow the '#' character at the end of identifiers and "
  202. "literals with the haskell lexer "
  203. "(GHC -XMagicHash extension)");
  204. DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
  205. "Set to 0 to disable highlighting of Template Haskell name quotations "
  206. "and promoted constructors "
  207. "(GHC -XTemplateHaskell and -XDataKinds extensions)");
  208. DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
  209. "Set to 1 to allow the '?' character at the start of identifiers "
  210. "with the haskell lexer "
  211. "(GHC & Hugs -XImplicitParams extension)");
  212. DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
  213. "Set to 0 to disallow \"safe\" keyword in imports "
  214. "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
  215. DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
  216. "Set to 0 to disable C-preprocessor highlighting "
  217. "(-XCPP extension)");
  218. DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
  219. "For Haskell code, determines whether all preprocessor code is styled in the "
  220. "preprocessor style (0, the default) or only from the initial # to the end "
  221. "of the command word(1)."
  222. );
  223. DefineProperty("fold", &OptionsHaskell::fold);
  224. DefineProperty("fold.comment", &OptionsHaskell::foldComment);
  225. DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
  226. DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
  227. "Set to 1 to enable folding of import declarations");
  228. DefineWordListSets(haskellWordListDesc);
  229. }
  230. };
  231. class LexerHaskell : public ILexer {
  232. bool literate;
  233. Sci_Position firstImportLine;
  234. int firstImportIndent;
  235. WordList keywords;
  236. WordList ffi;
  237. WordList reserved_operators;
  238. OptionsHaskell options;
  239. OptionSetHaskell osHaskell;
  240. enum HashCount {
  241. oneHash
  242. ,twoHashes
  243. ,unlimitedHashes
  244. };
  245. enum KeywordMode {
  246. HA_MODE_DEFAULT = 0
  247. ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
  248. ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
  249. ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
  250. ,HA_MODE_MODULE = 4 // after "module", before module name.
  251. ,HA_MODE_FFI = 5 // after "foreign", before FFI keywords
  252. ,HA_MODE_TYPE = 6 // after "type" or "data", before "family"
  253. };
  254. enum LiterateMode {
  255. LITERATE_BIRD = 0 // if '>' is the first character on the line,
  256. // color '>' as a codedelim and the rest of
  257. // the line as code.
  258. // else if "\begin{code}" is the only word on the
  259. // line except whitespace, switch to LITERATE_BLOCK
  260. // otherwise color the line as a literate comment.
  261. ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
  262. // 0 ignoring all later characters, color the line
  263. // as a codedelim and switch to LITERATE_BIRD
  264. // otherwise color the line as code.
  265. };
  266. struct HaskellLineInfo {
  267. unsigned int nestLevel; // 22 bits ought to be enough for anybody
  268. unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
  269. // beyond 31.
  270. bool pragma;
  271. LiterateMode lmode;
  272. KeywordMode mode;
  273. HaskellLineInfo(int state) :
  274. nestLevel (state >> 10)
  275. , nonexternalStyle ((state >> 5) & 0x1F)
  276. , pragma ((state >> 4) & 0x1)
  277. , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
  278. , mode (static_cast<KeywordMode>(state & 0x7))
  279. {}
  280. int ToLineState() {
  281. return
  282. (nestLevel << 10)
  283. | (nonexternalStyle << 5)
  284. | (pragma << 4)
  285. | (lmode << 3)
  286. | mode;
  287. }
  288. };
  289. inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
  290. if (options.magicHash && sc.ch == '#') {
  291. sc.Forward();
  292. if (hashes == twoHashes && sc.ch == '#') {
  293. sc.Forward();
  294. } else if (hashes == unlimitedHashes) {
  295. while (sc.ch == '#') {
  296. sc.Forward();
  297. }
  298. }
  299. }
  300. }
  301. bool LineContainsImport(const Sci_Position line, Accessor &styler) const {
  302. if (options.foldImports) {
  303. Sci_Position currentPos = styler.LineStart(line);
  304. int style = styler.StyleAt(currentPos);
  305. Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  306. while (currentPos < eol_pos) {
  307. int ch = styler[currentPos];
  308. style = styler.StyleAt(currentPos);
  309. if (ch == ' ' || ch == '\t'
  310. || IsCommentBlockStyle(style)
  311. || style == SCE_HA_LITERATE_CODEDELIM) {
  312. currentPos++;
  313. } else {
  314. break;
  315. }
  316. }
  317. return (style == SCE_HA_KEYWORD
  318. && styler.Match(currentPos, "import"));
  319. } else {
  320. return false;
  321. }
  322. }
  323. inline int IndentAmountWithOffset(Accessor &styler, const Sci_Position line) const {
  324. const int indent = HaskellIndentAmount(styler, line);
  325. const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
  326. return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
  327. ? indent
  328. : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
  329. }
  330. inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
  331. return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
  332. ? indentLevel
  333. : indentLevel - firstImportIndent;
  334. }
  335. public:
  336. LexerHaskell(bool literate_)
  337. : literate(literate_)
  338. , firstImportLine(-1)
  339. , firstImportIndent(0)
  340. {}
  341. virtual ~LexerHaskell() {}
  342. void SCI_METHOD Release() {
  343. delete this;
  344. }
  345. int SCI_METHOD Version() const {
  346. return lvOriginal;
  347. }
  348. const char * SCI_METHOD PropertyNames() {
  349. return osHaskell.PropertyNames();
  350. }
  351. int SCI_METHOD PropertyType(const char *name) {
  352. return osHaskell.PropertyType(name);
  353. }
  354. const char * SCI_METHOD DescribeProperty(const char *name) {
  355. return osHaskell.DescribeProperty(name);
  356. }
  357. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
  358. const char * SCI_METHOD DescribeWordListSets() {
  359. return osHaskell.DescribeWordListSets();
  360. }
  361. Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
  362. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  363. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  364. void * SCI_METHOD PrivateCall(int, void *) {
  365. return 0;
  366. }
  367. static ILexer *LexerFactoryHaskell() {
  368. return new LexerHaskell(false);
  369. }
  370. static ILexer *LexerFactoryLiterateHaskell() {
  371. return new LexerHaskell(true);
  372. }
  373. };
  374. Sci_Position SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
  375. if (osHaskell.PropertySet(&options, key, val)) {
  376. return 0;
  377. }
  378. return -1;
  379. }
  380. Sci_Position SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
  381. WordList *wordListN = 0;
  382. switch (n) {
  383. case 0:
  384. wordListN = &keywords;
  385. break;
  386. case 1:
  387. wordListN = &ffi;
  388. break;
  389. case 2:
  390. wordListN = &reserved_operators;
  391. break;
  392. }
  393. Sci_Position firstModification = -1;
  394. if (wordListN) {
  395. WordList wlNew;
  396. wlNew.Set(wl);
  397. if (*wordListN != wlNew) {
  398. wordListN->Set(wl);
  399. firstModification = 0;
  400. }
  401. }
  402. return firstModification;
  403. }
  404. void SCI_METHOD LexerHaskell::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle
  405. ,IDocument *pAccess) {
  406. LexAccessor styler(pAccess);
  407. Sci_Position lineCurrent = styler.GetLine(startPos);
  408. HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
  409. // Do not leak onto next line
  410. if (initStyle == SCE_HA_STRINGEOL)
  411. initStyle = SCE_HA_DEFAULT;
  412. else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
  413. initStyle = hs.nonexternalStyle;
  414. StyleContext sc(startPos, length, initStyle, styler);
  415. int base = 10;
  416. bool dot = false;
  417. bool inDashes = false;
  418. bool alreadyInTheMiddleOfOperator = false;
  419. assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
  420. while (sc.More()) {
  421. // Check for state end
  422. if (!IsExternalStyle(sc.state)) {
  423. hs.nonexternalStyle = sc.state;
  424. }
  425. // For lexer to work, states should unconditionally forward at least one
  426. // character.
  427. // If they don't, they should still check if they are at line end and
  428. // forward if so.
  429. // If a state forwards more than one character, it should check every time
  430. // that it is not a line end and cease forwarding otherwise.
  431. if (sc.atLineEnd) {
  432. // Remember the line state for future incremental lexing
  433. styler.SetLineState(lineCurrent, hs.ToLineState());
  434. lineCurrent++;
  435. }
  436. // Handle line continuation generically.
  437. if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
  438. && ( sc.state == SCE_HA_STRING
  439. || sc.state == SCE_HA_PREPROCESSOR)) {
  440. // Remember the line state for future incremental lexing
  441. styler.SetLineState(lineCurrent, hs.ToLineState());
  442. lineCurrent++;
  443. sc.Forward();
  444. if (sc.ch == '\r' && sc.chNext == '\n') {
  445. sc.Forward();
  446. }
  447. sc.Forward();
  448. continue;
  449. }
  450. if (sc.atLineStart) {
  451. if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
  452. // Prevent SCE_HA_STRINGEOL from leaking back to previous line
  453. sc.SetState(sc.state);
  454. }
  455. if (literate && hs.lmode == LITERATE_BIRD) {
  456. if (!IsExternalStyle(sc.state)) {
  457. sc.SetState(SCE_HA_LITERATE_COMMENT);
  458. }
  459. }
  460. }
  461. // External
  462. // Literate
  463. if ( literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
  464. && sc.ch == '>') {
  465. sc.SetState(SCE_HA_LITERATE_CODEDELIM);
  466. sc.ForwardSetState(hs.nonexternalStyle);
  467. }
  468. else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
  469. && ( sc.ch == ' ' || sc.ch == '\t'
  470. || sc.Match("\\begin{code}"))) {
  471. sc.SetState(sc.state);
  472. while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
  473. sc.Forward();
  474. if (sc.Match("\\begin{code}")) {
  475. sc.Forward(static_cast<int>(strlen("\\begin{code}")));
  476. bool correct = true;
  477. while (!sc.atLineEnd && sc.More()) {
  478. if (sc.ch != ' ' && sc.ch != '\t') {
  479. correct = false;
  480. }
  481. sc.Forward();
  482. }
  483. if (correct) {
  484. sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
  485. hs.lmode = LITERATE_BLOCK;
  486. }
  487. }
  488. }
  489. else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
  490. && sc.Match("\\end{code}")) {
  491. sc.SetState(SCE_HA_LITERATE_CODEDELIM);
  492. sc.Forward(static_cast<int>(strlen("\\end{code}")));
  493. while (!sc.atLineEnd && sc.More()) {
  494. sc.Forward();
  495. }
  496. sc.SetState(SCE_HA_LITERATE_COMMENT);
  497. hs.lmode = LITERATE_BIRD;
  498. }
  499. // Preprocessor
  500. else if (sc.atLineStart && sc.ch == '#' && options.cpp
  501. && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
  502. sc.SetState(SCE_HA_PREPROCESSOR);
  503. sc.Forward();
  504. }
  505. // Literate
  506. else if (sc.state == SCE_HA_LITERATE_COMMENT) {
  507. sc.Forward();
  508. }
  509. else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
  510. sc.ForwardSetState(hs.nonexternalStyle);
  511. }
  512. // Preprocessor
  513. else if (sc.state == SCE_HA_PREPROCESSOR) {
  514. if (sc.atLineEnd) {
  515. sc.SetState(options.stylingWithinPreprocessor
  516. ? SCE_HA_DEFAULT
  517. : hs.nonexternalStyle);
  518. sc.Forward(); // prevent double counting a line
  519. } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
  520. sc.SetState(SCE_HA_DEFAULT);
  521. } else {
  522. sc.Forward();
  523. }
  524. }
  525. // Haskell
  526. // Operator
  527. else if (sc.state == SCE_HA_OPERATOR) {
  528. int style = SCE_HA_OPERATOR;
  529. if ( sc.ch == ':'
  530. && !alreadyInTheMiddleOfOperator
  531. // except "::"
  532. && !( sc.chNext == ':'
  533. && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
  534. style = SCE_HA_CAPITAL;
  535. }
  536. alreadyInTheMiddleOfOperator = false;
  537. while (IsAnHaskellOperatorChar(sc.ch))
  538. sc.Forward();
  539. char s[100];
  540. sc.GetCurrent(s, sizeof(s));
  541. if (reserved_operators.InList(s))
  542. style = SCE_HA_RESERVED_OPERATOR;
  543. sc.ChangeState(style);
  544. sc.SetState(SCE_HA_DEFAULT);
  545. }
  546. // String
  547. else if (sc.state == SCE_HA_STRING) {
  548. if (sc.atLineEnd) {
  549. sc.ChangeState(SCE_HA_STRINGEOL);
  550. sc.ForwardSetState(SCE_HA_DEFAULT);
  551. } else if (sc.ch == '\"') {
  552. sc.Forward();
  553. skipMagicHash(sc, oneHash);
  554. sc.SetState(SCE_HA_DEFAULT);
  555. } else if (sc.ch == '\\') {
  556. sc.Forward(2);
  557. } else {
  558. sc.Forward();
  559. }
  560. }
  561. // Char
  562. else if (sc.state == SCE_HA_CHARACTER) {
  563. if (sc.atLineEnd) {
  564. sc.ChangeState(SCE_HA_STRINGEOL);
  565. sc.ForwardSetState(SCE_HA_DEFAULT);
  566. } else if (sc.ch == '\'') {
  567. sc.Forward();
  568. skipMagicHash(sc, oneHash);
  569. sc.SetState(SCE_HA_DEFAULT);
  570. } else if (sc.ch == '\\') {
  571. sc.Forward(2);
  572. } else {
  573. sc.Forward();
  574. }
  575. }
  576. // Number
  577. else if (sc.state == SCE_HA_NUMBER) {
  578. if (sc.atLineEnd) {
  579. sc.SetState(SCE_HA_DEFAULT);
  580. sc.Forward(); // prevent double counting a line
  581. } else if (IsADigit(sc.ch, base)) {
  582. sc.Forward();
  583. } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
  584. sc.Forward(2);
  585. dot = false;
  586. } else if ((base == 10) &&
  587. (sc.ch == 'e' || sc.ch == 'E') &&
  588. (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
  589. sc.Forward();
  590. if (sc.ch == '+' || sc.ch == '-')
  591. sc.Forward();
  592. } else {
  593. skipMagicHash(sc, twoHashes);
  594. sc.SetState(SCE_HA_DEFAULT);
  595. }
  596. }
  597. // Keyword or Identifier
  598. else if (sc.state == SCE_HA_IDENTIFIER) {
  599. int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
  600. assert(IsAHaskellWordStart(sc.ch));
  601. sc.Forward();
  602. while (sc.More()) {
  603. if (IsAHaskellWordChar(sc.ch)) {
  604. sc.Forward();
  605. } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
  606. if (IsHaskellUpperCase(sc.chNext)) {
  607. sc.Forward();
  608. style = SCE_HA_CAPITAL;
  609. } else if (IsAHaskellWordStart(sc.chNext)) {
  610. sc.Forward();
  611. style = SCE_HA_IDENTIFIER;
  612. } else if (IsAnHaskellOperatorChar(sc.chNext)) {
  613. sc.Forward();
  614. style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
  615. while (IsAnHaskellOperatorChar(sc.ch))
  616. sc.Forward();
  617. break;
  618. } else {
  619. break;
  620. }
  621. } else {
  622. break;
  623. }
  624. }
  625. skipMagicHash(sc, unlimitedHashes);
  626. char s[100];
  627. sc.GetCurrent(s, sizeof(s));
  628. KeywordMode new_mode = HA_MODE_DEFAULT;
  629. if (keywords.InList(s)) {
  630. style = SCE_HA_KEYWORD;
  631. } else if (style == SCE_HA_CAPITAL) {
  632. if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
  633. style = SCE_HA_MODULE;
  634. new_mode = HA_MODE_IMPORT2;
  635. } else if (hs.mode == HA_MODE_MODULE) {
  636. style = SCE_HA_MODULE;
  637. }
  638. } else if (hs.mode == HA_MODE_IMPORT1 &&
  639. strcmp(s,"qualified") == 0) {
  640. style = SCE_HA_KEYWORD;
  641. new_mode = HA_MODE_IMPORT1;
  642. } else if (options.highlightSafe &&
  643. hs.mode == HA_MODE_IMPORT1 &&
  644. strcmp(s,"safe") == 0) {
  645. style = SCE_HA_KEYWORD;
  646. new_mode = HA_MODE_IMPORT1;
  647. } else if (hs.mode == HA_MODE_IMPORT2) {
  648. if (strcmp(s,"as") == 0) {
  649. style = SCE_HA_KEYWORD;
  650. new_mode = HA_MODE_IMPORT3;
  651. } else if (strcmp(s,"hiding") == 0) {
  652. style = SCE_HA_KEYWORD;
  653. }
  654. } else if (hs.mode == HA_MODE_TYPE) {
  655. if (strcmp(s,"family") == 0)
  656. style = SCE_HA_KEYWORD;
  657. }
  658. if (hs.mode == HA_MODE_FFI) {
  659. if (ffi.InList(s)) {
  660. style = SCE_HA_KEYWORD;
  661. new_mode = HA_MODE_FFI;
  662. }
  663. }
  664. sc.ChangeState(style);
  665. sc.SetState(SCE_HA_DEFAULT);
  666. if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
  667. new_mode = HA_MODE_IMPORT1;
  668. else if (strcmp(s,"module") == 0)
  669. new_mode = HA_MODE_MODULE;
  670. else if (strcmp(s,"foreign") == 0)
  671. new_mode = HA_MODE_FFI;
  672. else if (strcmp(s,"type") == 0
  673. || strcmp(s,"data") == 0)
  674. new_mode = HA_MODE_TYPE;
  675. hs.mode = new_mode;
  676. }
  677. // Comments
  678. // Oneliner
  679. else if (sc.state == SCE_HA_COMMENTLINE) {
  680. if (sc.atLineEnd) {
  681. sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
  682. sc.Forward(); // prevent double counting a line
  683. } else if (inDashes && sc.ch != '-' && !hs.pragma) {
  684. inDashes = false;
  685. if (IsAnHaskellOperatorChar(sc.ch)) {
  686. alreadyInTheMiddleOfOperator = true;
  687. sc.ChangeState(SCE_HA_OPERATOR);
  688. }
  689. } else {
  690. sc.Forward();
  691. }
  692. }
  693. // Nested
  694. else if (IsCommentBlockStyle(sc.state)) {
  695. if (sc.Match('{','-')) {
  696. sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
  697. sc.Forward(2);
  698. hs.nestLevel++;
  699. } else if (sc.Match('-','}')) {
  700. sc.Forward(2);
  701. assert(hs.nestLevel > 0);
  702. if (hs.nestLevel > 0)
  703. hs.nestLevel--;
  704. sc.SetState(
  705. hs.nestLevel == 0
  706. ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
  707. : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
  708. } else {
  709. sc.Forward();
  710. }
  711. }
  712. // Pragma
  713. else if (sc.state == SCE_HA_PRAGMA) {
  714. if (sc.Match("#-}")) {
  715. hs.pragma = false;
  716. sc.Forward(3);
  717. sc.SetState(SCE_HA_DEFAULT);
  718. } else if (sc.Match('-','-')) {
  719. sc.SetState(SCE_HA_COMMENTLINE);
  720. sc.Forward(2);
  721. inDashes = false;
  722. } else if (sc.Match('{','-')) {
  723. sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
  724. sc.Forward(2);
  725. hs.nestLevel = 1;
  726. } else {
  727. sc.Forward();
  728. }
  729. }
  730. // New state?
  731. else if (sc.state == SCE_HA_DEFAULT) {
  732. // Digit
  733. if (IsADigit(sc.ch)) {
  734. hs.mode = HA_MODE_DEFAULT;
  735. sc.SetState(SCE_HA_NUMBER);
  736. if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
  737. // Match anything starting with "0x" or "0X", too
  738. sc.Forward(2);
  739. base = 16;
  740. dot = false;
  741. } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
  742. // Match anything starting with "0o" or "0O", too
  743. sc.Forward(2);
  744. base = 8;
  745. dot = false;
  746. } else {
  747. sc.Forward();
  748. base = 10;
  749. dot = true;
  750. }
  751. }
  752. // Pragma
  753. else if (sc.Match("{-#")) {
  754. hs.pragma = true;
  755. sc.SetState(SCE_HA_PRAGMA);
  756. sc.Forward(3);
  757. }
  758. // Comment line
  759. else if (sc.Match('-','-')) {
  760. sc.SetState(SCE_HA_COMMENTLINE);
  761. sc.Forward(2);
  762. inDashes = true;
  763. }
  764. // Comment block
  765. else if (sc.Match('{','-')) {
  766. sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
  767. sc.Forward(2);
  768. hs.nestLevel = 1;
  769. }
  770. // String
  771. else if (sc.ch == '\"') {
  772. sc.SetState(SCE_HA_STRING);
  773. sc.Forward();
  774. }
  775. // Character or quoted name or promoted term
  776. else if (sc.ch == '\'') {
  777. hs.mode = HA_MODE_DEFAULT;
  778. sc.SetState(SCE_HA_CHARACTER);
  779. sc.Forward();
  780. if (options.allowQuotes) {
  781. // Quoted type ''T
  782. if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
  783. sc.Forward();
  784. sc.ChangeState(SCE_HA_IDENTIFIER);
  785. } else if (sc.chNext != '\'') {
  786. // Quoted name 'n or promoted constructor 'N
  787. if (IsAHaskellWordStart(sc.ch)) {
  788. sc.ChangeState(SCE_HA_IDENTIFIER);
  789. // Promoted constructor operator ':~>
  790. } else if (sc.ch == ':') {
  791. alreadyInTheMiddleOfOperator = false;
  792. sc.ChangeState(SCE_HA_OPERATOR);
  793. // Promoted list or tuple '[T]
  794. } else if (sc.ch == '[' || sc.ch== '(') {
  795. sc.ChangeState(SCE_HA_OPERATOR);
  796. sc.ForwardSetState(SCE_HA_DEFAULT);
  797. }
  798. }
  799. }
  800. }
  801. // Operator starting with '?' or an implicit parameter
  802. else if (sc.ch == '?') {
  803. hs.mode = HA_MODE_DEFAULT;
  804. alreadyInTheMiddleOfOperator = false;
  805. sc.SetState(SCE_HA_OPERATOR);
  806. if ( options.implicitParams
  807. && IsAHaskellWordStart(sc.chNext)
  808. && !IsHaskellUpperCase(sc.chNext)) {
  809. sc.Forward();
  810. sc.ChangeState(SCE_HA_IDENTIFIER);
  811. }
  812. }
  813. // Operator
  814. else if (IsAnHaskellOperatorChar(sc.ch)) {
  815. hs.mode = HA_MODE_DEFAULT;
  816. sc.SetState(SCE_HA_OPERATOR);
  817. }
  818. // Braces and punctuation
  819. else if (sc.ch == ',' || sc.ch == ';'
  820. || sc.ch == '(' || sc.ch == ')'
  821. || sc.ch == '[' || sc.ch == ']'
  822. || sc.ch == '{' || sc.ch == '}') {
  823. sc.SetState(SCE_HA_OPERATOR);
  824. sc.ForwardSetState(SCE_HA_DEFAULT);
  825. }
  826. // Keyword or Identifier
  827. else if (IsAHaskellWordStart(sc.ch)) {
  828. sc.SetState(SCE_HA_IDENTIFIER);
  829. // Something we don't care about
  830. } else {
  831. sc.Forward();
  832. }
  833. }
  834. // This branch should never be reached.
  835. else {
  836. assert(false);
  837. sc.Forward();
  838. }
  839. }
  840. sc.Complete();
  841. }
  842. void SCI_METHOD LexerHaskell::Fold(Sci_PositionU startPos, Sci_Position length, int // initStyle
  843. ,IDocument *pAccess) {
  844. if (!options.fold)
  845. return;
  846. Accessor styler(pAccess, NULL);
  847. Sci_Position lineCurrent = styler.GetLine(startPos);
  848. if (lineCurrent <= firstImportLine) {
  849. firstImportLine = -1; // readjust first import position
  850. firstImportIndent = 0;
  851. }
  852. const Sci_Position maxPos = startPos + length;
  853. const Sci_Position maxLines =
  854. maxPos == styler.Length()
  855. ? styler.GetLine(maxPos)
  856. : styler.GetLine(maxPos - 1); // Requested last line
  857. const Sci_Position docLines = styler.GetLine(styler.Length()); // Available last line
  858. // Backtrack to previous non-blank line so we can determine indent level
  859. // for any white space lines
  860. // and so we can fix any preceding fold level (which is why we go back
  861. // at least one line in all cases)
  862. bool importHere = LineContainsImport(lineCurrent, styler);
  863. int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
  864. while (lineCurrent > 0) {
  865. lineCurrent--;
  866. importHere = LineContainsImport(lineCurrent, styler);
  867. indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
  868. if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
  869. break;
  870. }
  871. int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
  872. if (importHere) {
  873. indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
  874. if (firstImportLine == -1) {
  875. firstImportLine = lineCurrent;
  876. firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
  877. }
  878. if (firstImportLine != lineCurrent) {
  879. indentCurrentLevel++;
  880. }
  881. }
  882. indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
  883. // Process all characters to end of requested range
  884. //that hangs over the end of the range. Cap processing in all cases
  885. // to end of document.
  886. while (lineCurrent <= docLines && lineCurrent <= maxLines) {
  887. // Gather info
  888. Sci_Position lineNext = lineCurrent + 1;
  889. importHere = false;
  890. int indentNext = indentCurrent;
  891. if (lineNext <= docLines) {
  892. // Information about next line is only available if not at end of document
  893. importHere = LineContainsImport(lineNext, styler);
  894. indentNext = IndentAmountWithOffset(styler, lineNext);
  895. }
  896. if (indentNext & SC_FOLDLEVELWHITEFLAG)
  897. indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
  898. // Skip past any blank lines for next indent level info; we skip also
  899. // comments (all comments, not just those starting in column 0)
  900. // which effectively folds them into surrounding code rather
  901. // than screwing up folding.
  902. while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
  903. lineNext++;
  904. importHere = LineContainsImport(lineNext, styler);
  905. indentNext = IndentAmountWithOffset(styler, lineNext);
  906. }
  907. int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
  908. if (importHere) {
  909. indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
  910. if (firstImportLine == -1) {
  911. firstImportLine = lineNext;
  912. firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
  913. }
  914. if (firstImportLine != lineNext) {
  915. indentNextLevel++;
  916. }
  917. }
  918. indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
  919. const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
  920. // Now set all the indent levels on the lines we skipped
  921. // Do this from end to start. Once we encounter one line
  922. // which is indented more than the line after the end of
  923. // the comment-block, use the level of the block before
  924. Sci_Position skipLine = lineNext;
  925. int skipLevel = indentNextLevel;
  926. while (--skipLine > lineCurrent) {
  927. int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
  928. if (options.foldCompact) {
  929. if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
  930. skipLevel = levelBeforeComments;
  931. }
  932. int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
  933. styler.SetLevel(skipLine, skipLevel | whiteFlag);
  934. } else {
  935. if ( (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
  936. && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
  937. skipLevel = levelBeforeComments;
  938. }
  939. styler.SetLevel(skipLine, skipLevel);
  940. }
  941. }
  942. int lev = indentCurrent;
  943. if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
  944. if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
  945. lev |= SC_FOLDLEVELHEADERFLAG;
  946. }
  947. // Set fold level for this line and move to next line
  948. styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
  949. indentCurrent = indentNext;
  950. indentCurrentLevel = indentNextLevel;
  951. lineCurrent = lineNext;
  952. }
  953. // NOTE: Cannot set level of last line here because indentCurrent doesn't have
  954. // header flag set; the loop above is crafted to take care of this case!
  955. //styler.SetLevel(lineCurrent, indentCurrent);
  956. }
  957. LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
  958. LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);