123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549 |
- // Scintilla source code edit control
- /** @file LexOScript.cxx
- ** Lexer for OScript sources; ocx files and/or OSpace dumps.
- ** OScript is a programming language used to develop applications for the
- ** Livelink server platform.
- **/
- // Written by Ferdinand Prantl <prantlf@gmail.com>, inspired by the code from
- // LexVB.cxx and LexPascal.cxx. The License.txt file describes the conditions
- // under which this software may be distributed.
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <assert.h>
- #include <ctype.h>
- #include "ILexer.h"
- #include "Scintilla.h"
- #include "SciLexer.h"
- #include "WordList.h"
- #include "LexAccessor.h"
- #include "Accessor.h"
- #include "StyleContext.h"
- #include "CharacterSet.h"
- #include "LexerModule.h"
- #ifdef SCI_NAMESPACE
- using namespace Scintilla;
- #endif
- // -----------------------------------------
- // Functions classifying a single character.
- // This function is generic and should be probably moved to CharSet.h where
- // IsAlphaNumeric the others reside.
- inline bool IsAlpha(int ch) {
- return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
- }
- static inline bool IsIdentifierChar(int ch) {
- // Identifiers cannot contain non-ASCII letters; a word with non-English
- // language-specific characters cannot be an identifier.
- return IsAlphaNumeric(ch) || ch == '_';
- }
- static inline bool IsIdentifierStart(int ch) {
- // Identifiers cannot contain non-ASCII letters; a word with non-English
- // language-specific characters cannot be an identifier.
- return IsAlpha(ch) || ch == '_';
- }
- static inline bool IsNumberChar(int ch, int chNext) {
- // Numeric constructs are not checked for lexical correctness. They are
- // expected to look like +1.23-E9 but actually any bunch of the following
- // characters will be styled as number.
- // KNOWN PROBLEM: if you put + or - operators immediately after a number
- // and the next operand starts with the letter E, the operator will not be
- // recognized and it will be styled together with the preceding number.
- // This should not occur; at least not often. The coding style recommends
- // putting spaces around operators.
- return IsADigit(ch) || toupper(ch) == 'E' || ch == '.' ||
- ((ch == '-' || ch == '+') && toupper(chNext) == 'E');
- }
- // This function checks for the start or a natural number without any symbols
- // or operators as a prefix; the IsPrefixedNumberStart should be called
- // immediately after this one to cover all possible numeric constructs.
- static inline bool IsNaturalNumberStart(int ch) {
- return IsADigit(ch) != 0;
- }
- static inline bool IsPrefixedNumberStart(int ch, int chNext) {
- // KNOWN PROBLEM: if you put + or - operators immediately before a number
- // the operator will not be recognized and it will be styled together with
- // the succeeding number. This should not occur; at least not often. The
- // coding style recommends putting spaces around operators.
- return (ch == '.' || ch == '-' || ch == '+') && IsADigit(chNext);
- }
- static inline bool IsOperator(int ch) {
- return strchr("%^&*()-+={}[]:;<>,/?!.~|\\", ch) != NULL;
- }
- // ---------------------------------------------------------------
- // Functions classifying a token currently processed in the lexer.
- // Checks if the current line starts with the preprocessor directive used
- // usually to introduce documentation comments: #ifdef DOC. This method is
- // supposed to be called if the line has been recognized as a preprocessor
- // directive already.
- static bool IsDocCommentStart(StyleContext &sc) {
- // Check the line back to its start only if the end looks promising.
- if (sc.LengthCurrent() == 10 && !IsAlphaNumeric(sc.ch)) {
- char s[11];
- sc.GetCurrentLowered(s, sizeof(s));
- return strcmp(s, "#ifdef doc") == 0;
- }
- return false;
- }
- // Checks if the current line starts with the preprocessor directive that
- // is complementary to the #ifdef DOC start: #endif. This method is supposed
- // to be called if the current state point to the documentation comment.
- // QUESTIONAL ASSUMPTION: The complete #endif directive is not checked; just
- // the starting #e. However, there is no other preprocessor directive with
- // the same starting letter and thus this optimization should always work.
- static bool IsDocCommentEnd(StyleContext &sc) {
- return sc.ch == '#' && sc.chNext == 'e';
- }
- class IdentifierClassifier {
- WordList &keywords; // Passed from keywords property.
- WordList &constants; // Passed from keywords2 property.
- WordList &operators; // Passed from keywords3 property.
- WordList &types; // Passed from keywords4 property.
- WordList &functions; // Passed from keywords5 property.
- WordList &objects; // Passed from keywords6 property.
- IdentifierClassifier(IdentifierClassifier const&);
- IdentifierClassifier& operator=(IdentifierClassifier const&);
- public:
- IdentifierClassifier(WordList *keywordlists[]) :
- keywords(*keywordlists[0]), constants(*keywordlists[1]),
- operators(*keywordlists[2]), types(*keywordlists[3]),
- functions(*keywordlists[4]), objects(*keywordlists[5])
- {}
- void ClassifyIdentifier(StyleContext &sc) {
- // Opening parenthesis following an identifier makes it a possible
- // function call.
- // KNOWN PROBLEM: If some whitespace is inserted between the
- // identifier and the parenthesis they will not be able to be
- // recognized as a function call. This should not occur; at
- // least not often. Such coding style would be weird.
- if (sc.Match('(')) {
- char s[100];
- sc.GetCurrentLowered(s, sizeof(s));
- // Before an opening brace can be control statements and
- // operators too; function call is the last option.
- if (keywords.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_KEYWORD);
- } else if (operators.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_OPERATOR);
- } else if (functions.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_FUNCTION);
- } else {
- sc.ChangeState(SCE_OSCRIPT_METHOD);
- }
- sc.SetState(SCE_OSCRIPT_OPERATOR);
- } else {
- char s[100];
- sc.GetCurrentLowered(s, sizeof(s));
- // A dot following an identifier means an access to an object
- // member. The related object identifier can be special.
- // KNOWN PROBLEM: If there is whitespace between the identifier
- // and the following dot, the identifier will not be recognized
- // as an object in an object member access. If it is one of the
- // listed static objects it will not be styled.
- if (sc.Match('.') && objects.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_OBJECT);
- sc.SetState(SCE_OSCRIPT_OPERATOR);
- } else {
- if (keywords.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_KEYWORD);
- } else if (constants.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_CONSTANT);
- } else if (operators.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_OPERATOR);
- } else if (types.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_TYPE);
- } else if (functions.InList(s)) {
- sc.ChangeState(SCE_OSCRIPT_FUNCTION);
- }
- sc.SetState(SCE_OSCRIPT_DEFAULT);
- }
- }
- }
- };
- // ------------------------------------------------
- // Function colourising an excerpt of OScript code.
- static void ColouriseOScriptDoc(Sci_PositionU startPos, Sci_Position length,
- int initStyle, WordList *keywordlists[],
- Accessor &styler) {
- // I wonder how whole-line styles ended by EOLN can escape the resetting
- // code in the loop below and overflow to the next line. Let us make sure
- // that a new line does not start with them carried from the previous one.
- // NOTE: An overflowing string is intentionally not checked; it reminds
- // the developer that the string must be ended on the same line.
- if (initStyle == SCE_OSCRIPT_LINE_COMMENT ||
- initStyle == SCE_OSCRIPT_PREPROCESSOR) {
- initStyle = SCE_OSCRIPT_DEFAULT;
- }
- styler.StartAt(startPos);
- StyleContext sc(startPos, length, initStyle, styler);
- IdentifierClassifier identifierClassifier(keywordlists);
- // It starts with true at the beginning of a line and changes to false as
- // soon as the first non-whitespace character has been processed.
- bool isFirstToken = true;
- // It starts with true at the beginning of a line and changes to false as
- // soon as the first identifier on the line is passed by.
- bool isFirstIdentifier = true;
- // It becomes false when #ifdef DOC (the preprocessor directive often
- // used to start a documentation comment) is encountered and remain false
- // until the end of the documentation block is not detected. This is done
- // by checking for the complementary #endif preprocessor directive.
- bool endDocComment = false;
- for (; sc.More(); sc.Forward()) {
- if (sc.atLineStart) {
- isFirstToken = true;
- isFirstIdentifier = true;
- // Detect the current state is neither whitespace nor identifier. It
- // means that no next identifier can be the first token on the line.
- } else if (isFirstIdentifier && sc.state != SCE_OSCRIPT_DEFAULT &&
- sc.state != SCE_OSCRIPT_IDENTIFIER) {
- isFirstIdentifier = false;
- }
- // Check if the current state should be changed.
- if (sc.state == SCE_OSCRIPT_OPERATOR) {
- // Multiple-symbol operators are marked by single characters.
- sc.SetState(SCE_OSCRIPT_DEFAULT);
- } else if (sc.state == SCE_OSCRIPT_IDENTIFIER) {
- if (!IsIdentifierChar(sc.ch)) {
- // Colon after an identifier makes it a label if it is the
- // first token on the line.
- // KNOWN PROBLEM: If some whitespace is inserted between the
- // identifier and the colon they will not be recognized as a
- // label. This should not occur; at least not often. It would
- // make the code structure less legible and examples in the
- // Livelink documentation do not show it.
- if (sc.Match(':') && isFirstIdentifier) {
- sc.ChangeState(SCE_OSCRIPT_LABEL);
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- } else {
- identifierClassifier.ClassifyIdentifier(sc);
- }
- // Avoid a sequence of two words be mistaken for a label. A
- // switch case would be an example.
- isFirstIdentifier = false;
- }
- } else if (sc.state == SCE_OSCRIPT_GLOBAL) {
- if (!IsIdentifierChar(sc.ch)) {
- sc.SetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_PROPERTY) {
- if (!IsIdentifierChar(sc.ch)) {
- // Any member access introduced by the dot operator is
- // initially marked as a property access. If an opening
- // parenthesis is detected later it is changed to method call.
- // KNOWN PROBLEM: The same as at the function call recognition
- // for SCE_OSCRIPT_IDENTIFIER above.
- if (sc.Match('(')) {
- sc.ChangeState(SCE_OSCRIPT_METHOD);
- }
- sc.SetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_NUMBER) {
- if (!IsNumberChar(sc.ch, sc.chNext)) {
- sc.SetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_SINGLEQUOTE_STRING) {
- if (sc.ch == '\'') {
- // Two consequential apostrophes convert to a single one.
- if (sc.chNext == '\'') {
- sc.Forward();
- } else {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.atLineEnd) {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_DOUBLEQUOTE_STRING) {
- if (sc.ch == '\"') {
- // Two consequential quotation marks convert to a single one.
- if (sc.chNext == '\"') {
- sc.Forward();
- } else {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.atLineEnd) {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_BLOCK_COMMENT) {
- if (sc.Match('*', '/')) {
- sc.Forward();
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_LINE_COMMENT) {
- if (sc.atLineEnd) {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_PREPROCESSOR) {
- if (IsDocCommentStart(sc)) {
- sc.ChangeState(SCE_OSCRIPT_DOC_COMMENT);
- endDocComment = false;
- } else if (sc.atLineEnd) {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- } else if (sc.state == SCE_OSCRIPT_DOC_COMMENT) {
- // KNOWN PROBLEM: The first line detected that would close a
- // conditional preprocessor block (#endif) the documentation
- // comment block will end. (Nested #if-#endif blocks are not
- // supported. Hopefully it will not occur often that a line
- // within the text block would stat with #endif.
- if (isFirstToken && IsDocCommentEnd(sc)) {
- endDocComment = true;
- } else if (sc.atLineEnd && endDocComment) {
- sc.ForwardSetState(SCE_OSCRIPT_DEFAULT);
- }
- }
- // Check what state starts with the current character.
- if (sc.state == SCE_OSCRIPT_DEFAULT) {
- if (sc.Match('\'')) {
- sc.SetState(SCE_OSCRIPT_SINGLEQUOTE_STRING);
- } else if (sc.Match('\"')) {
- sc.SetState(SCE_OSCRIPT_DOUBLEQUOTE_STRING);
- } else if (sc.Match('/', '/')) {
- sc.SetState(SCE_OSCRIPT_LINE_COMMENT);
- sc.Forward();
- } else if (sc.Match('/', '*')) {
- sc.SetState(SCE_OSCRIPT_BLOCK_COMMENT);
- sc.Forward();
- } else if (isFirstToken && sc.Match('#')) {
- sc.SetState(SCE_OSCRIPT_PREPROCESSOR);
- } else if (sc.Match('$')) {
- // Both process-global ($xxx) and thread-global ($$xxx)
- // variables are handled as one global.
- sc.SetState(SCE_OSCRIPT_GLOBAL);
- } else if (IsNaturalNumberStart(sc.ch)) {
- sc.SetState(SCE_OSCRIPT_NUMBER);
- } else if (IsPrefixedNumberStart(sc.ch, sc.chNext)) {
- sc.SetState(SCE_OSCRIPT_NUMBER);
- sc.Forward();
- } else if (sc.Match('.') && IsIdentifierStart(sc.chNext)) {
- // Every object member access is marked as a property access
- // initially. The decision between property and method is made
- // after parsing the identifier and looking what comes then.
- // KNOWN PROBLEM: If there is whitespace between the following
- // identifier and the dot, the dot will not be recognized
- // as a member accessing operator. In turn, the identifier
- // will not be recognizable as a property or a method too.
- sc.SetState(SCE_OSCRIPT_OPERATOR);
- sc.Forward();
- sc.SetState(SCE_OSCRIPT_PROPERTY);
- } else if (IsIdentifierStart(sc.ch)) {
- sc.SetState(SCE_OSCRIPT_IDENTIFIER);
- } else if (IsOperator(sc.ch)) {
- sc.SetState(SCE_OSCRIPT_OPERATOR);
- }
- }
- if (isFirstToken && !IsASpaceOrTab(sc.ch)) {
- isFirstToken = false;
- }
- }
- sc.Complete();
- }
- // ------------------------------------------
- // Functions supporting OScript code folding.
- static inline bool IsBlockComment(int style) {
- return style == SCE_OSCRIPT_BLOCK_COMMENT;
- }
- static bool IsLineComment(Sci_Position line, Accessor &styler) {
- Sci_Position pos = styler.LineStart(line);
- Sci_Position eolPos = styler.LineStart(line + 1) - 1;
- for (Sci_Position i = pos; i < eolPos; i++) {
- char ch = styler[i];
- char chNext = styler.SafeGetCharAt(i + 1);
- int style = styler.StyleAt(i);
- if (ch == '/' && chNext == '/' && style == SCE_OSCRIPT_LINE_COMMENT) {
- return true;
- } else if (!IsASpaceOrTab(ch)) {
- return false;
- }
- }
- return false;
- }
- static inline bool IsPreprocessor(int style) {
- return style == SCE_OSCRIPT_PREPROCESSOR ||
- style == SCE_OSCRIPT_DOC_COMMENT;
- }
- static void GetRangeLowered(Sci_PositionU start, Sci_PositionU end,
- Accessor &styler, char *s, Sci_PositionU len) {
- Sci_PositionU i = 0;
- while (i < end - start + 1 && i < len - 1) {
- s[i] = static_cast<char>(tolower(styler[start + i]));
- i++;
- }
- s[i] = '\0';
- }
- static void GetForwardWordLowered(Sci_PositionU start, Accessor &styler,
- char *s, Sci_PositionU len) {
- Sci_PositionU i = 0;
- while (i < len - 1 && IsAlpha(styler.SafeGetCharAt(start + i))) {
- s[i] = static_cast<char>(tolower(styler.SafeGetCharAt(start + i)));
- i++;
- }
- s[i] = '\0';
- }
- static void UpdatePreprocessorFoldLevel(int &levelCurrent,
- Sci_PositionU startPos, Accessor &styler) {
- char s[7]; // Size of the longest possible keyword + null.
- GetForwardWordLowered(startPos, styler, s, sizeof(s));
- if (strcmp(s, "ifdef") == 0 ||
- strcmp(s, "ifndef") == 0) {
- levelCurrent++;
- } else if (strcmp(s, "endif") == 0) {
- levelCurrent--;
- if (levelCurrent < SC_FOLDLEVELBASE) {
- levelCurrent = SC_FOLDLEVELBASE;
- }
- }
- }
- static void UpdateKeywordFoldLevel(int &levelCurrent, Sci_PositionU lastStart,
- Sci_PositionU currentPos, Accessor &styler) {
- char s[9];
- GetRangeLowered(lastStart, currentPos, styler, s, sizeof(s));
- if (strcmp(s, "if") == 0 || strcmp(s, "for") == 0 ||
- strcmp(s, "switch") == 0 || strcmp(s, "function") == 0 ||
- strcmp(s, "while") == 0 || strcmp(s, "repeat") == 0) {
- levelCurrent++;
- } else if (strcmp(s, "end") == 0 || strcmp(s, "until") == 0) {
- levelCurrent--;
- if (levelCurrent < SC_FOLDLEVELBASE) {
- levelCurrent = SC_FOLDLEVELBASE;
- }
- }
- }
- // ------------------------------
- // Function folding OScript code.
- static void FoldOScriptDoc(Sci_PositionU startPos, Sci_Position length, int initStyle,
- WordList *[], Accessor &styler) {
- bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
- bool foldPreprocessor = styler.GetPropertyInt("fold.preprocessor") != 0;
- bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
- Sci_Position endPos = startPos + length;
- int visibleChars = 0;
- Sci_Position lineCurrent = styler.GetLine(startPos);
- int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
- int levelCurrent = levelPrev;
- char chNext = styler[startPos];
- int styleNext = styler.StyleAt(startPos);
- int style = initStyle;
- int lastStart = 0;
- for (Sci_Position i = startPos; i < endPos; i++) {
- char ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- int stylePrev = style;
- style = styleNext;
- styleNext = styler.StyleAt(i + 1);
- bool atLineEnd = (ch == '\r' && chNext != '\n') || (ch == '\n');
- if (foldComment && IsBlockComment(style)) {
- if (!IsBlockComment(stylePrev)) {
- levelCurrent++;
- } else if (!IsBlockComment(styleNext) && !atLineEnd) {
- // Comments do not end at end of line and the next character
- // may not be styled.
- levelCurrent--;
- }
- }
- if (foldComment && atLineEnd && IsLineComment(lineCurrent, styler)) {
- if (!IsLineComment(lineCurrent - 1, styler) &&
- IsLineComment(lineCurrent + 1, styler))
- levelCurrent++;
- else if (IsLineComment(lineCurrent - 1, styler) &&
- !IsLineComment(lineCurrent+1, styler))
- levelCurrent--;
- }
- if (foldPreprocessor) {
- if (ch == '#' && IsPreprocessor(style)) {
- UpdatePreprocessorFoldLevel(levelCurrent, i + 1, styler);
- }
- }
- if (stylePrev != SCE_OSCRIPT_KEYWORD && style == SCE_OSCRIPT_KEYWORD) {
- lastStart = i;
- }
- if (stylePrev == SCE_OSCRIPT_KEYWORD) {
- if(IsIdentifierChar(ch) && !IsIdentifierChar(chNext)) {
- UpdateKeywordFoldLevel(levelCurrent, lastStart, i, styler);
- }
- }
- if (!IsASpace(ch))
- visibleChars++;
- if (atLineEnd) {
- int level = levelPrev;
- if (visibleChars == 0 && foldCompact)
- level |= SC_FOLDLEVELWHITEFLAG;
- if ((levelCurrent > levelPrev) && (visibleChars > 0))
- level |= SC_FOLDLEVELHEADERFLAG;
- if (level != styler.LevelAt(lineCurrent)) {
- styler.SetLevel(lineCurrent, level);
- }
- lineCurrent++;
- levelPrev = levelCurrent;
- visibleChars = 0;
- }
- }
- // If we did not reach EOLN in the previous loop, store the line level and
- // whitespace information. The rest will be filled in later.
- int lev = levelPrev;
- if (visibleChars == 0 && foldCompact)
- lev |= SC_FOLDLEVELWHITEFLAG;
- styler.SetLevel(lineCurrent, lev);
- }
- // --------------------------------------------
- // Declaration of the OScript lexer descriptor.
- static const char * const oscriptWordListDesc[] = {
- "Keywords and reserved words",
- "Literal constants",
- "Literal operators",
- "Built-in value and reference types",
- "Built-in global functions",
- "Built-in static objects",
- 0
- };
- LexerModule lmOScript(SCLEX_OSCRIPT, ColouriseOScriptDoc, "oscript", FoldOScriptDoc, oscriptWordListDesc);
|