LexBibTex.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. // Copyright 2008-2010 Sergiu Dotenco. The License.txt file describes the
  2. // conditions under which this software may be distributed.
  3. /**
  4. * @file LexBibTeX.cxx
  5. * @brief General BibTeX coloring scheme.
  6. * @author Sergiu Dotenco
  7. * @date April 18, 2009
  8. */
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <cassert>
  12. #include <cctype>
  13. #include <string>
  14. #include <algorithm>
  15. #include <functional>
  16. #include "ILexer.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "PropSetSimple.h"
  20. #include "WordList.h"
  21. #include "LexAccessor.h"
  22. #include "Accessor.h"
  23. #include "StyleContext.h"
  24. #include "CharacterSet.h"
  25. #include "LexerModule.h"
  26. #ifdef SCI_NAMESPACE
  27. using namespace Scintilla;
  28. #endif
  29. namespace {
  30. bool IsAlphabetic(unsigned int ch)
  31. {
  32. return IsASCII(ch) && std::isalpha(ch) != 0;
  33. }
  34. bool IsAlphaNumeric(char ch)
  35. {
  36. return IsASCII(ch) && std::isalnum(ch);
  37. }
  38. bool EqualCaseInsensitive(const char* a, const char* b)
  39. {
  40. return CompareCaseInsensitive(a, b) == 0;
  41. }
  42. bool EntryWithoutKey(const char* name)
  43. {
  44. return EqualCaseInsensitive(name,"string");
  45. }
  46. char GetClosingBrace(char openbrace)
  47. {
  48. char result = openbrace;
  49. switch (openbrace) {
  50. case '(': result = ')'; break;
  51. case '{': result = '}'; break;
  52. }
  53. return result;
  54. }
  55. bool IsEntryStart(char prev, char ch)
  56. {
  57. return prev != '\\' && ch == '@';
  58. }
  59. bool IsEntryStart(const StyleContext& sc)
  60. {
  61. return IsEntryStart(sc.chPrev, sc.ch);
  62. }
  63. void ColorizeBibTeX(Sci_PositionU start_pos, Sci_Position length, int /*init_style*/, WordList* keywordlists[], Accessor& styler)
  64. {
  65. WordList &EntryNames = *keywordlists[0];
  66. bool fold_compact = styler.GetPropertyInt("fold.compact", 1) != 0;
  67. std::string buffer;
  68. buffer.reserve(25);
  69. // We always colorize a section from the beginning, so let's
  70. // search for the @ character which isn't escaped, i.e. \@
  71. while (start_pos > 0 && !IsEntryStart(styler.SafeGetCharAt(start_pos - 1),
  72. styler.SafeGetCharAt(start_pos))) {
  73. --start_pos; ++length;
  74. }
  75. styler.StartAt(start_pos);
  76. styler.StartSegment(start_pos);
  77. Sci_Position current_line = styler.GetLine(start_pos);
  78. int prev_level = styler.LevelAt(current_line) & SC_FOLDLEVELNUMBERMASK;
  79. int current_level = prev_level;
  80. int visible_chars = 0;
  81. bool in_comment = false ;
  82. StyleContext sc(start_pos, length, SCE_BIBTEX_DEFAULT, styler);
  83. bool going = sc.More(); // needed because of a fuzzy end of file state
  84. char closing_brace = 0;
  85. bool collect_entry_name = false;
  86. for (; going; sc.Forward()) {
  87. if (!sc.More())
  88. going = false; // we need to go one behind the end of text
  89. if (in_comment) {
  90. if (sc.atLineEnd) {
  91. sc.SetState(SCE_BIBTEX_DEFAULT);
  92. in_comment = false;
  93. }
  94. }
  95. else {
  96. // Found @entry
  97. if (IsEntryStart(sc)) {
  98. sc.SetState(SCE_BIBTEX_UNKNOWN_ENTRY);
  99. sc.Forward();
  100. ++current_level;
  101. buffer.clear();
  102. collect_entry_name = true;
  103. }
  104. else if ((sc.state == SCE_BIBTEX_ENTRY || sc.state == SCE_BIBTEX_UNKNOWN_ENTRY)
  105. && (sc.ch == '{' || sc.ch == '(')) {
  106. // Entry name colorization done
  107. // Found either a { or a ( after entry's name, e.g. @entry(...) @entry{...}
  108. // Closing counterpart needs to be stored.
  109. closing_brace = GetClosingBrace(sc.ch);
  110. sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize { (
  111. // @string doesn't have any key
  112. if (EntryWithoutKey(buffer.c_str()))
  113. sc.ForwardSetState(SCE_BIBTEX_PARAMETER);
  114. else
  115. sc.ForwardSetState(SCE_BIBTEX_KEY); // Key/label colorization
  116. }
  117. // Need to handle the case where entry's key is empty
  118. // e.g. @book{,...}
  119. if (sc.state == SCE_BIBTEX_KEY && sc.ch == ',') {
  120. // Key/label colorization done
  121. sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the ,
  122. sc.ForwardSetState(SCE_BIBTEX_PARAMETER); // Parameter colorization
  123. }
  124. else if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == '=') {
  125. sc.SetState(SCE_BIBTEX_DEFAULT); // Don't colorize the =
  126. sc.ForwardSetState(SCE_BIBTEX_VALUE); // Parameter value colorization
  127. Sci_Position start = sc.currentPos;
  128. // We need to handle multiple situations:
  129. // 1. name"one two {three}"
  130. // 2. name={one {one two {two}} three}
  131. // 3. year=2005
  132. // Skip ", { until we encounter the first alphanumerical character
  133. while (sc.More() && !(IsAlphaNumeric(sc.ch) || sc.ch == '"' || sc.ch == '{'))
  134. sc.Forward();
  135. if (sc.More()) {
  136. // Store " or {
  137. char ch = sc.ch;
  138. // Not interested in alphanumerical characters
  139. if (IsAlphaNumeric(ch))
  140. ch = 0;
  141. int skipped = 0;
  142. if (ch) {
  143. // Skip preceding " or { such as in name={{test}}.
  144. // Remember how many characters have been skipped
  145. // Make sure that empty values, i.e. "" are also handled correctly
  146. while (sc.More() && (sc.ch == ch && (ch != '"' || skipped < 1))) {
  147. sc.Forward();
  148. ++skipped;
  149. }
  150. }
  151. // Closing counterpart for " is the same character
  152. if (ch == '{')
  153. ch = '}';
  154. // We have reached the parameter value
  155. // In case the open character was a alnum char, skip until , is found
  156. // otherwise until skipped == 0
  157. while (sc.More() && (skipped > 0 || (!ch && !(sc.ch == ',' || sc.ch == closing_brace)))) {
  158. // Make sure the character isn't escaped
  159. if (sc.chPrev != '\\') {
  160. // Parameter value contains a { which is the 2nd case described above
  161. if (sc.ch == '{')
  162. ++skipped; // Remember it
  163. else if (sc.ch == '}')
  164. --skipped;
  165. else if (skipped == 1 && sc.ch == ch && ch == '"') // Don't ignore cases like {"o}
  166. skipped = 0;
  167. }
  168. sc.Forward();
  169. }
  170. }
  171. // Don't colorize the ,
  172. sc.SetState(SCE_BIBTEX_DEFAULT);
  173. // Skip until the , or entry's closing closing_brace is found
  174. // since this parameter might be the last one
  175. while (sc.More() && !(sc.ch == ',' || sc.ch == closing_brace))
  176. sc.Forward();
  177. int state = SCE_BIBTEX_PARAMETER; // The might be more parameters
  178. // We've reached the closing closing_brace for the bib entry
  179. // in case no " or {} has been used to enclose the value,
  180. // as in 3rd case described above
  181. if (sc.ch == closing_brace) {
  182. --current_level;
  183. // Make sure the text between entries is not colored
  184. // using parameter's style
  185. state = SCE_BIBTEX_DEFAULT;
  186. }
  187. Sci_Position end = sc.currentPos;
  188. current_line = styler.GetLine(end);
  189. // We have possibly skipped some lines, so the folding levels
  190. // have to be adjusted separately
  191. for (Sci_Position i = styler.GetLine(start); i <= styler.GetLine(end); ++i)
  192. styler.SetLevel(i, prev_level);
  193. sc.ForwardSetState(state);
  194. }
  195. if (sc.state == SCE_BIBTEX_PARAMETER && sc.ch == closing_brace) {
  196. sc.SetState(SCE_BIBTEX_DEFAULT);
  197. --current_level;
  198. }
  199. // Non escaped % found which represents a comment until the end of the line
  200. if (sc.chPrev != '\\' && sc.ch == '%') {
  201. in_comment = true;
  202. sc.SetState(SCE_BIBTEX_COMMENT);
  203. }
  204. }
  205. if (sc.state == SCE_BIBTEX_UNKNOWN_ENTRY || sc.state == SCE_BIBTEX_ENTRY) {
  206. if (!IsAlphabetic(sc.ch) && collect_entry_name)
  207. collect_entry_name = false;
  208. if (collect_entry_name) {
  209. buffer += static_cast<char>(tolower(sc.ch));
  210. if (EntryNames.InList(buffer.c_str()))
  211. sc.ChangeState(SCE_BIBTEX_ENTRY);
  212. else
  213. sc.ChangeState(SCE_BIBTEX_UNKNOWN_ENTRY);
  214. }
  215. }
  216. if (sc.atLineEnd) {
  217. int level = prev_level;
  218. if (visible_chars == 0 && fold_compact)
  219. level |= SC_FOLDLEVELWHITEFLAG;
  220. if ((current_level > prev_level))
  221. level |= SC_FOLDLEVELHEADERFLAG;
  222. // else if (current_level < prev_level)
  223. // level |= SC_FOLDLEVELBOXFOOTERFLAG; // Deprecated
  224. if (level != styler.LevelAt(current_line)) {
  225. styler.SetLevel(current_line, level);
  226. }
  227. ++current_line;
  228. prev_level = current_level;
  229. visible_chars = 0;
  230. }
  231. if (!isspacechar(sc.ch))
  232. ++visible_chars;
  233. }
  234. sc.Complete();
  235. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  236. int flagsNext = styler.LevelAt(current_line) & ~SC_FOLDLEVELNUMBERMASK;
  237. styler.SetLevel(current_line, prev_level | flagsNext);
  238. }
  239. }
  240. static const char * const BibTeXWordLists[] = {
  241. "Entry Names",
  242. 0,
  243. };
  244. LexerModule lmBibTeX(SCLEX_BIBTEX, ColorizeBibTeX, "bib", 0, BibTeXWordLists);
  245. // Entry Names
  246. // article, book, booklet, conference, inbook,
  247. // incollection, inproceedings, manual, mastersthesis,
  248. // misc, phdthesis, proceedings, techreport, unpublished,
  249. // string, url