LexTeX.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. // Scintilla source code edit control
  2. // File: LexTeX.cxx - general context conformant tex coloring scheme
  3. // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com
  4. // Version: September 28, 2003
  5. // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. // This lexer is derived from the one written for the texwork environment (1999++) which in
  8. // turn is inspired on texedit (1991++) which finds its roots in wdt (1986).
  9. // If you run into strange boundary cases, just tell me and I'll look into it.
  10. // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko.
  11. // Version: June 22, 2007
  12. #include <stdlib.h>
  13. #include <string.h>
  14. #include <stdio.h>
  15. #include <stdarg.h>
  16. #include <assert.h>
  17. #include <ctype.h>
  18. #include "ILexer.h"
  19. #include "Scintilla.h"
  20. #include "SciLexer.h"
  21. #include "WordList.h"
  22. #include "LexAccessor.h"
  23. #include "Accessor.h"
  24. #include "StyleContext.h"
  25. #include "CharacterSet.h"
  26. #include "LexerModule.h"
  27. #ifdef SCI_NAMESPACE
  28. using namespace Scintilla;
  29. #endif
  30. // val SCE_TEX_DEFAULT = 0
  31. // val SCE_TEX_SPECIAL = 1
  32. // val SCE_TEX_GROUP = 2
  33. // val SCE_TEX_SYMBOL = 3
  34. // val SCE_TEX_COMMAND = 4
  35. // val SCE_TEX_TEXT = 5
  36. // Definitions in SciTEGlobal.properties:
  37. //
  38. // TeX Highlighting
  39. //
  40. // # Default
  41. // style.tex.0=fore:#7F7F00
  42. // # Special
  43. // style.tex.1=fore:#007F7F
  44. // # Group
  45. // style.tex.2=fore:#880000
  46. // # Symbol
  47. // style.tex.3=fore:#7F7F00
  48. // # Command
  49. // style.tex.4=fore:#008800
  50. // # Text
  51. // style.tex.5=fore:#000000
  52. // lexer.tex.interface.default=0
  53. // lexer.tex.comment.process=0
  54. // todo: lexer.tex.auto.if
  55. // Auxiliary functions:
  56. static inline bool endOfLine(Accessor &styler, Sci_PositionU i) {
  57. return
  58. (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ;
  59. }
  60. static inline bool isTeXzero(int ch) {
  61. return
  62. (ch == '%') ;
  63. }
  64. static inline bool isTeXone(int ch) {
  65. return
  66. (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') ||
  67. (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') ||
  68. (ch == '"') ;
  69. }
  70. static inline bool isTeXtwo(int ch) {
  71. return
  72. (ch == '{') || (ch == '}') || (ch == '$') ;
  73. }
  74. static inline bool isTeXthree(int ch) {
  75. return
  76. (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') ||
  77. (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') ||
  78. (ch == '/') || (ch == '|') || (ch == '%') ;
  79. }
  80. static inline bool isTeXfour(int ch) {
  81. return
  82. (ch == '\\') ;
  83. }
  84. static inline bool isTeXfive(int ch) {
  85. return
  86. ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
  87. (ch == '@') || (ch == '!') || (ch == '?') ;
  88. }
  89. static inline bool isTeXsix(int ch) {
  90. return
  91. (ch == ' ') ;
  92. }
  93. static inline bool isTeXseven(int ch) {
  94. return
  95. (ch == '^') ;
  96. }
  97. // Interface determination
  98. static int CheckTeXInterface(
  99. Sci_PositionU startPos,
  100. Sci_Position length,
  101. Accessor &styler,
  102. int defaultInterface) {
  103. char lineBuffer[1024] ;
  104. Sci_PositionU linePos = 0 ;
  105. // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)...
  106. if (styler.SafeGetCharAt(0) == '%') {
  107. for (Sci_PositionU i = 0; i < startPos + length; i++) {
  108. lineBuffer[linePos++] = styler.SafeGetCharAt(i) ;
  109. if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
  110. lineBuffer[linePos] = '\0';
  111. if (strstr(lineBuffer, "interface=all")) {
  112. return 0 ;
  113. } else if (strstr(lineBuffer, "interface=tex")) {
  114. return 1 ;
  115. } else if (strstr(lineBuffer, "interface=nl")) {
  116. return 2 ;
  117. } else if (strstr(lineBuffer, "interface=en")) {
  118. return 3 ;
  119. } else if (strstr(lineBuffer, "interface=de")) {
  120. return 4 ;
  121. } else if (strstr(lineBuffer, "interface=cz")) {
  122. return 5 ;
  123. } else if (strstr(lineBuffer, "interface=it")) {
  124. return 6 ;
  125. } else if (strstr(lineBuffer, "interface=ro")) {
  126. return 7 ;
  127. } else if (strstr(lineBuffer, "interface=latex")) {
  128. // we will move latex cum suis up to 91+ when more keyword lists are supported
  129. return 8 ;
  130. } else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module")) {
  131. // better would be to limit the search to just one line
  132. return 3 ;
  133. } else {
  134. return defaultInterface ;
  135. }
  136. }
  137. }
  138. }
  139. return defaultInterface ;
  140. }
  141. static void ColouriseTeXDoc(
  142. Sci_PositionU startPos,
  143. Sci_Position length,
  144. int,
  145. WordList *keywordlists[],
  146. Accessor &styler) {
  147. styler.StartAt(startPos) ;
  148. styler.StartSegment(startPos) ;
  149. bool processComment = styler.GetPropertyInt("lexer.tex.comment.process", 0) == 1 ;
  150. bool useKeywords = styler.GetPropertyInt("lexer.tex.use.keywords", 1) == 1 ;
  151. bool autoIf = styler.GetPropertyInt("lexer.tex.auto.if", 1) == 1 ;
  152. int defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default", 1) ;
  153. char key[100] ;
  154. int k ;
  155. bool newifDone = false ;
  156. bool inComment = false ;
  157. int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ;
  158. if (currentInterface == 0) {
  159. useKeywords = false ;
  160. currentInterface = 1 ;
  161. }
  162. WordList &keywords = *keywordlists[currentInterface-1] ;
  163. StyleContext sc(startPos, length, SCE_TEX_TEXT, styler);
  164. bool going = sc.More() ; // needed because of a fuzzy end of file state
  165. for (; going; sc.Forward()) {
  166. if (! sc.More()) { going = false ; } // we need to go one behind the end of text
  167. if (inComment) {
  168. if (sc.atLineEnd) {
  169. sc.SetState(SCE_TEX_TEXT) ;
  170. newifDone = false ;
  171. inComment = false ;
  172. }
  173. } else {
  174. if (! isTeXfive(sc.ch)) {
  175. if (sc.state == SCE_TEX_COMMAND) {
  176. if (sc.LengthCurrent() == 1) { // \<noncstoken>
  177. if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
  178. sc.Forward(2) ; // \^^ and \^^<token>
  179. }
  180. sc.ForwardSetState(SCE_TEX_TEXT) ;
  181. } else {
  182. sc.GetCurrent(key, sizeof(key)-1) ;
  183. k = static_cast<int>(strlen(key)) ;
  184. memmove(key,key+1,k) ; // shift left over escape token
  185. key[k] = '\0' ;
  186. k-- ;
  187. if (! keywords || ! useKeywords) {
  188. sc.SetState(SCE_TEX_COMMAND) ;
  189. newifDone = false ;
  190. } else if (k == 1) { //\<cstoken>
  191. sc.SetState(SCE_TEX_COMMAND) ;
  192. newifDone = false ;
  193. } else if (keywords.InList(key)) {
  194. sc.SetState(SCE_TEX_COMMAND) ;
  195. newifDone = autoIf && (strcmp(key,"newif") == 0) ;
  196. } else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if")) {
  197. sc.SetState(SCE_TEX_COMMAND) ;
  198. } else {
  199. sc.ChangeState(SCE_TEX_TEXT) ;
  200. sc.SetState(SCE_TEX_TEXT) ;
  201. newifDone = false ;
  202. }
  203. }
  204. }
  205. if (isTeXzero(sc.ch)) {
  206. sc.SetState(SCE_TEX_SYMBOL);
  207. if (!endOfLine(styler,sc.currentPos + 1))
  208. sc.ForwardSetState(SCE_TEX_DEFAULT) ;
  209. inComment = ! processComment ;
  210. newifDone = false ;
  211. } else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
  212. sc.SetState(SCE_TEX_TEXT) ;
  213. sc.ForwardSetState(SCE_TEX_TEXT) ;
  214. } else if (isTeXone(sc.ch)) {
  215. sc.SetState(SCE_TEX_SPECIAL) ;
  216. newifDone = false ;
  217. } else if (isTeXtwo(sc.ch)) {
  218. sc.SetState(SCE_TEX_GROUP) ;
  219. newifDone = false ;
  220. } else if (isTeXthree(sc.ch)) {
  221. sc.SetState(SCE_TEX_SYMBOL) ;
  222. newifDone = false ;
  223. } else if (isTeXfour(sc.ch)) {
  224. sc.SetState(SCE_TEX_COMMAND) ;
  225. } else if (isTeXsix(sc.ch)) {
  226. sc.SetState(SCE_TEX_TEXT) ;
  227. } else if (sc.atLineEnd) {
  228. sc.SetState(SCE_TEX_TEXT) ;
  229. newifDone = false ;
  230. inComment = false ;
  231. } else {
  232. sc.SetState(SCE_TEX_TEXT) ;
  233. }
  234. } else if (sc.state != SCE_TEX_COMMAND) {
  235. sc.SetState(SCE_TEX_TEXT) ;
  236. }
  237. }
  238. }
  239. sc.ChangeState(SCE_TEX_TEXT) ;
  240. sc.Complete();
  241. }
  242. static inline bool isNumber(int ch) {
  243. return
  244. (ch == '0') || (ch == '1') || (ch == '2') ||
  245. (ch == '3') || (ch == '4') || (ch == '5') ||
  246. (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9');
  247. }
  248. static inline bool isWordChar(int ch) {
  249. return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'));
  250. }
  251. static int ParseTeXCommand(Sci_PositionU pos, Accessor &styler, char *command)
  252. {
  253. Sci_Position length=0;
  254. char ch=styler.SafeGetCharAt(pos+1);
  255. if(ch==',' || ch==':' || ch==';' || ch=='%'){
  256. command[0]=ch;
  257. command[1]=0;
  258. return 1;
  259. }
  260. // find end
  261. while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){
  262. command[length]=ch;
  263. length++;
  264. ch=styler.SafeGetCharAt(pos+length+1);
  265. }
  266. command[length]='\0';
  267. if(!length) return 0;
  268. return length+1;
  269. }
  270. static int classifyFoldPointTeXPaired(const char* s) {
  271. int lev=0;
  272. if (!(isdigit(s[0]) || (s[0] == '.'))){
  273. if (strcmp(s, "begin")==0||strcmp(s,"FoldStart")==0||
  274. strcmp(s,"abstract")==0||strcmp(s,"unprotect")==0||
  275. strcmp(s,"title")==0||strncmp(s,"start",5)==0||strncmp(s,"Start",5)==0||
  276. strcmp(s,"documentclass")==0||strncmp(s,"if",2)==0
  277. )
  278. lev=1;
  279. if (strcmp(s, "end")==0||strcmp(s,"FoldStop")==0||
  280. strcmp(s,"maketitle")==0||strcmp(s,"protect")==0||
  281. strncmp(s,"stop",4)==0||strncmp(s,"Stop",4)==0||
  282. strcmp(s,"fi")==0
  283. )
  284. lev=-1;
  285. }
  286. return lev;
  287. }
  288. static int classifyFoldPointTeXUnpaired(const char* s) {
  289. int lev=0;
  290. if (!(isdigit(s[0]) || (s[0] == '.'))){
  291. if (strcmp(s,"part")==0||
  292. strcmp(s,"chapter")==0||
  293. strcmp(s,"section")==0||
  294. strcmp(s,"subsection")==0||
  295. strcmp(s,"subsubsection")==0||
  296. strcmp(s,"CJKfamily")==0||
  297. strcmp(s,"appendix")==0||
  298. strcmp(s,"Topic")==0||strcmp(s,"topic")==0||
  299. strcmp(s,"subject")==0||strcmp(s,"subsubject")==0||
  300. strcmp(s,"def")==0||strcmp(s,"gdef")==0||strcmp(s,"edef")==0||
  301. strcmp(s,"xdef")==0||strcmp(s,"framed")==0||
  302. strcmp(s,"frame")==0||
  303. strcmp(s,"foilhead")==0||strcmp(s,"overlays")==0||strcmp(s,"slide")==0
  304. ){
  305. lev=1;
  306. }
  307. }
  308. return lev;
  309. }
  310. static bool IsTeXCommentLine(Sci_Position line, Accessor &styler) {
  311. Sci_Position pos = styler.LineStart(line);
  312. Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  313. Sci_Position startpos = pos;
  314. while (startpos<eol_pos){
  315. char ch = styler[startpos];
  316. if (ch!='%' && ch!=' ') return false;
  317. else if (ch=='%') return true;
  318. startpos++;
  319. }
  320. return false;
  321. }
  322. // FoldTeXDoc: borrowed from VisualTeX with modifications
  323. static void FoldTexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler)
  324. {
  325. bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  326. Sci_PositionU endPos = startPos+length;
  327. int visibleChars=0;
  328. Sci_Position lineCurrent=styler.GetLine(startPos);
  329. int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  330. int levelCurrent=levelPrev;
  331. char chNext=styler[startPos];
  332. char buffer[100]="";
  333. for (Sci_PositionU i=startPos; i < endPos; i++) {
  334. char ch=chNext;
  335. chNext=styler.SafeGetCharAt(i+1);
  336. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  337. if(ch=='\\') {
  338. ParseTeXCommand(i, styler, buffer);
  339. levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer);
  340. }
  341. if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) {
  342. ParseTeXCommand(i+1, styler, buffer);
  343. levelCurrent -= classifyFoldPointTeXUnpaired(buffer);
  344. }
  345. char chNext2;
  346. char chNext3;
  347. char chNext4;
  348. char chNext5;
  349. chNext2=styler.SafeGetCharAt(i+2);
  350. chNext3=styler.SafeGetCharAt(i+3);
  351. chNext4=styler.SafeGetCharAt(i+4);
  352. chNext5=styler.SafeGetCharAt(i+5);
  353. bool atEOfold = (ch == '%') &&
  354. (chNext == '%') && (chNext2=='}') &&
  355. (chNext3=='}')&& (chNext4=='-')&& (chNext5=='-');
  356. bool atBOfold = (ch == '%') &&
  357. (chNext == '%') && (chNext2=='-') &&
  358. (chNext3=='-')&& (chNext4=='{')&& (chNext5=='{');
  359. if(atBOfold){
  360. levelCurrent+=1;
  361. }
  362. if(atEOfold){
  363. levelCurrent-=1;
  364. }
  365. if(ch=='\\' && chNext=='['){
  366. levelCurrent+=1;
  367. }
  368. if(ch=='\\' && chNext==']'){
  369. levelCurrent-=1;
  370. }
  371. bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
  372. if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler))
  373. {
  374. if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler)
  375. )
  376. levelCurrent++;
  377. else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler)
  378. && IsTeXCommentLine(lineCurrent + 1, styler)
  379. )
  380. levelCurrent++;
  381. else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) &&
  382. !IsTeXCommentLine(lineCurrent+1, styler))
  383. levelCurrent--;
  384. }
  385. //---------------------------------------------------------------------------------------------
  386. if (atEOL) {
  387. int lev = levelPrev;
  388. if (visibleChars == 0 && foldCompact)
  389. lev |= SC_FOLDLEVELWHITEFLAG;
  390. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  391. lev |= SC_FOLDLEVELHEADERFLAG;
  392. if (lev != styler.LevelAt(lineCurrent)) {
  393. styler.SetLevel(lineCurrent, lev);
  394. }
  395. lineCurrent++;
  396. levelPrev = levelCurrent;
  397. visibleChars = 0;
  398. }
  399. if (!isspacechar(ch))
  400. visibleChars++;
  401. }
  402. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  403. int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
  404. styler.SetLevel(lineCurrent, levelPrev | flagsNext);
  405. }
  406. static const char * const texWordListDesc[] = {
  407. "TeX, eTeX, pdfTeX, Omega",
  408. "ConTeXt Dutch",
  409. "ConTeXt English",
  410. "ConTeXt German",
  411. "ConTeXt Czech",
  412. "ConTeXt Italian",
  413. "ConTeXt Romanian",
  414. 0,
  415. } ;
  416. LexerModule lmTeX(SCLEX_TEX, ColouriseTeXDoc, "tex", FoldTexDoc, texWordListDesc);