123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- // Scintilla source code edit control
- /**
- * @file LexJSON.cxx
- * @date February 19, 2016
- * @brief Lexer for JSON and JSON-LD formats
- * @author nkmathew
- *
- * The License.txt file describes the conditions under which this software may
- * be distributed.
- *
- */
- #include <cstdlib>
- #include <cassert>
- #include <cctype>
- #include <cstdio>
- #include <string>
- #include <vector>
- #include <map>
- #include "ILexer.h"
- #include "Scintilla.h"
- #include "SciLexer.h"
- #include "WordList.h"
- #include "LexAccessor.h"
- #include "StyleContext.h"
- #include "CharacterSet.h"
- #include "LexerModule.h"
- #include "OptionSet.h"
- #ifdef SCI_NAMESPACE
- using namespace Scintilla;
- #endif
- static const char *const JSONWordListDesc[] = {
- "JSON Keywords",
- "JSON-LD Keywords",
- 0
- };
- /**
- * Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
- * colon separating the prefix and suffix
- *
- * https://www.w3.org/TR/json-ld/#dfn-compact-iri
- */
- struct CompactIRI {
- int colonCount;
- bool foundInvalidChar;
- CharacterSet setCompactIRI;
- CompactIRI() {
- colonCount = 0;
- foundInvalidChar = false;
- setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
- }
- void resetState() {
- colonCount = 0;
- foundInvalidChar = false;
- }
- void checkChar(int ch) {
- if (ch == ':') {
- colonCount++;
- } else {
- foundInvalidChar |= !setCompactIRI.Contains(ch);
- }
- }
- bool shouldHighlight() const {
- return !foundInvalidChar && colonCount == 1;
- }
- };
- /**
- * Keeps track of escaped characters in strings as per:
- *
- * https://tools.ietf.org/html/rfc7159#section-7
- */
- struct EscapeSequence {
- int digitsLeft;
- CharacterSet setHexDigits;
- CharacterSet setEscapeChars;
- EscapeSequence() {
- digitsLeft = 0;
- setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
- setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
- }
- // Returns true if the following character is a valid escaped character
- bool newSequence(int nextChar) {
- digitsLeft = 0;
- if (nextChar == 'u') {
- digitsLeft = 5;
- } else if (!setEscapeChars.Contains(nextChar)) {
- return false;
- }
- return true;
- }
- bool atEscapeEnd() const {
- return digitsLeft <= 0;
- }
- bool isInvalidChar(int currChar) const {
- return !setHexDigits.Contains(currChar);
- }
- };
- struct OptionsJSON {
- bool foldCompact;
- bool fold;
- bool allowComments;
- bool escapeSequence;
- OptionsJSON() {
- foldCompact = false;
- fold = false;
- allowComments = false;
- escapeSequence = false;
- }
- };
- struct OptionSetJSON : public OptionSet<OptionsJSON> {
- OptionSetJSON() {
- DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
- "Set to 1 to enable highlighting of escape sequences in strings");
- DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
- "Set to 1 to enable highlighting of line/block comments in JSON");
- DefineProperty("fold.compact", &OptionsJSON::foldCompact);
- DefineProperty("fold", &OptionsJSON::fold);
- DefineWordListSets(JSONWordListDesc);
- }
- };
- class LexerJSON : public ILexer {
- OptionsJSON options;
- OptionSetJSON optSetJSON;
- EscapeSequence escapeSeq;
- WordList keywordsJSON;
- WordList keywordsJSONLD;
- CharacterSet setOperators;
- CharacterSet setURL;
- CharacterSet setKeywordJSONLD;
- CharacterSet setKeywordJSON;
- CompactIRI compactIRI;
- static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
- Sci_Position i = 0;
- while (i < 50) {
- i++;
- char curr = styler.SafeGetCharAt(start+i, '\0');
- char next = styler.SafeGetCharAt(start+i+1, '\0');
- bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
- if (curr == ch) {
- return true;
- } else if (!isspacechar(curr) || atEOL) {
- return false;
- }
- }
- return false;
- }
- /**
- * Looks for the colon following the end quote
- *
- * Assumes property names of lengths no longer than a 100 characters.
- * The colon is also expected to be less than 50 spaces after the end
- * quote for the string to be considered a property name
- */
- static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
- Sci_Position i = 0;
- bool escaped = false;
- while (i < 100) {
- i++;
- char curr = styler.SafeGetCharAt(start+i, '\0');
- if (escaped) {
- escaped = false;
- continue;
- }
- escaped = curr == '\\';
- if (curr == '"') {
- return IsNextNonWhitespace(styler, start+i, ':');
- } else if (!curr) {
- return false;
- }
- }
- return false;
- }
- static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
- StyleContext &context, LexAccessor &styler) {
- char word[51];
- Sci_Position currPos = (Sci_Position) context.currentPos;
- int i = 0;
- while (i < 50) {
- char ch = styler.SafeGetCharAt(currPos + i);
- if (!wordSet.Contains(ch)) {
- break;
- }
- word[i] = ch;
- i++;
- }
- word[i] = '\0';
- return keywordList.InList(word);
- }
- public:
- LexerJSON() :
- setOperators(CharacterSet::setNone, "[{}]:,"),
- setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
- setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
- setKeywordJSON(CharacterSet::setAlpha, "$_") {
- }
- virtual ~LexerJSON() {}
- virtual int SCI_METHOD Version() const {
- return lvOriginal;
- }
- virtual void SCI_METHOD Release() {
- delete this;
- }
- virtual const char *SCI_METHOD PropertyNames() {
- return optSetJSON.PropertyNames();
- }
- virtual int SCI_METHOD PropertyType(const char *name) {
- return optSetJSON.PropertyType(name);
- }
- virtual const char *SCI_METHOD DescribeProperty(const char *name) {
- return optSetJSON.DescribeProperty(name);
- }
- virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) {
- if (optSetJSON.PropertySet(&options, key, val)) {
- return 0;
- }
- return -1;
- }
- virtual Sci_Position SCI_METHOD WordListSet(int n, const char *wl) {
- WordList *wordListN = 0;
- switch (n) {
- case 0:
- wordListN = &keywordsJSON;
- break;
- case 1:
- wordListN = &keywordsJSONLD;
- break;
- }
- Sci_Position firstModification = -1;
- if (wordListN) {
- WordList wlNew;
- wlNew.Set(wl);
- if (*wordListN != wlNew) {
- wordListN->Set(wl);
- firstModification = 0;
- }
- }
- return firstModification;
- }
- virtual void *SCI_METHOD PrivateCall(int, void *) {
- return 0;
- }
- static ILexer *LexerFactoryJSON() {
- return new LexerJSON;
- }
- virtual const char *SCI_METHOD DescribeWordListSets() {
- return optSetJSON.DescribeWordListSets();
- }
- virtual void SCI_METHOD Lex(Sci_PositionU startPos,
- Sci_Position length,
- int initStyle,
- IDocument *pAccess);
- virtual void SCI_METHOD Fold(Sci_PositionU startPos,
- Sci_Position length,
- int initStyle,
- IDocument *pAccess);
- };
- void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
- Sci_Position length,
- int initStyle,
- IDocument *pAccess) {
- LexAccessor styler(pAccess);
- StyleContext context(startPos, length, initStyle, styler);
- int stringStyleBefore = SCE_JSON_STRING;
- while (context.More()) {
- switch (context.state) {
- case SCE_JSON_BLOCKCOMMENT:
- if (context.Match("*/")) {
- context.Forward();
- context.ForwardSetState(SCE_JSON_DEFAULT);
- }
- break;
- case SCE_JSON_LINECOMMENT:
- if (context.atLineEnd) {
- context.SetState(SCE_JSON_DEFAULT);
- }
- break;
- case SCE_JSON_STRINGEOL:
- if (context.atLineStart) {
- context.SetState(SCE_JSON_DEFAULT);
- }
- break;
- case SCE_JSON_ESCAPESEQUENCE:
- escapeSeq.digitsLeft--;
- if (!escapeSeq.atEscapeEnd()) {
- if (escapeSeq.isInvalidChar(context.ch)) {
- context.SetState(SCE_JSON_ERROR);
- }
- break;
- }
- if (context.ch == '"') {
- context.SetState(stringStyleBefore);
- context.ForwardSetState(SCE_C_DEFAULT);
- } else if (context.ch == '\\') {
- if (!escapeSeq.newSequence(context.chNext)) {
- context.SetState(SCE_JSON_ERROR);
- }
- context.Forward();
- } else {
- context.SetState(stringStyleBefore);
- if (context.atLineEnd) {
- context.ChangeState(SCE_JSON_STRINGEOL);
- }
- }
- break;
- case SCE_JSON_PROPERTYNAME:
- case SCE_JSON_STRING:
- if (context.ch == '"') {
- if (compactIRI.shouldHighlight()) {
- context.ChangeState(SCE_JSON_COMPACTIRI);
- context.ForwardSetState(SCE_JSON_DEFAULT);
- compactIRI.resetState();
- } else {
- context.ForwardSetState(SCE_JSON_DEFAULT);
- }
- } else if (context.atLineEnd) {
- context.ChangeState(SCE_JSON_STRINGEOL);
- } else if (context.ch == '\\') {
- stringStyleBefore = context.state;
- if (options.escapeSequence) {
- context.SetState(SCE_JSON_ESCAPESEQUENCE);
- if (!escapeSeq.newSequence(context.chNext)) {
- context.SetState(SCE_JSON_ERROR);
- }
- }
- context.Forward();
- } else if (context.Match("https://") ||
- context.Match("http://") ||
- context.Match("ssh://") ||
- context.Match("git://") ||
- context.Match("svn://") ||
- context.Match("ftp://") ||
- context.Match("mailto:")) {
- // Handle most common URI schemes only
- stringStyleBefore = context.state;
- context.SetState(SCE_JSON_URI);
- } else if (context.ch == '@') {
- // https://www.w3.org/TR/json-ld/#dfn-keyword
- if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
- stringStyleBefore = context.state;
- context.SetState(SCE_JSON_LDKEYWORD);
- }
- } else {
- compactIRI.checkChar(context.ch);
- }
- break;
- case SCE_JSON_LDKEYWORD:
- case SCE_JSON_URI:
- if ((!setKeywordJSONLD.Contains(context.ch) &&
- (context.state == SCE_JSON_LDKEYWORD)) ||
- (!setURL.Contains(context.ch))) {
- context.SetState(stringStyleBefore);
- }
- if (context.ch == '"') {
- context.ForwardSetState(SCE_JSON_DEFAULT);
- } else if (context.atLineEnd) {
- context.ChangeState(SCE_JSON_STRINGEOL);
- }
- break;
- case SCE_JSON_OPERATOR:
- case SCE_JSON_NUMBER:
- context.SetState(SCE_JSON_DEFAULT);
- break;
- case SCE_JSON_ERROR:
- if (context.atLineEnd) {
- context.SetState(SCE_JSON_DEFAULT);
- }
- break;
- case SCE_JSON_KEYWORD:
- if (!setKeywordJSON.Contains(context.ch)) {
- context.SetState(SCE_JSON_DEFAULT);
- }
- break;
- }
- if (context.state == SCE_JSON_DEFAULT) {
- if (context.ch == '"') {
- compactIRI.resetState();
- context.SetState(SCE_JSON_STRING);
- Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
- if (AtPropertyName(styler, currPos)) {
- context.SetState(SCE_JSON_PROPERTYNAME);
- }
- } else if (setOperators.Contains(context.ch)) {
- context.SetState(SCE_JSON_OPERATOR);
- } else if (options.allowComments && context.Match("/*")) {
- context.SetState(SCE_JSON_BLOCKCOMMENT);
- context.Forward();
- } else if (options.allowComments && context.Match("//")) {
- context.SetState(SCE_JSON_LINECOMMENT);
- } else if (setKeywordJSON.Contains(context.ch)) {
- if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
- context.SetState(SCE_JSON_KEYWORD);
- }
- }
- bool numberStart =
- IsADigit(context.ch) && (context.chPrev == '+'||
- context.chPrev == '-' ||
- context.atLineStart ||
- IsASpace(context.chPrev) ||
- setOperators.Contains(context.chPrev));
- bool exponentPart =
- tolower(context.ch) == 'e' &&
- IsADigit(context.chPrev) &&
- (IsADigit(context.chNext) ||
- context.chNext == '+' ||
- context.chNext == '-');
- bool signPart =
- (context.ch == '-' || context.ch == '+') &&
- ((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
- ((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
- && IsADigit(context.chNext)));
- bool adjacentDigit =
- IsADigit(context.ch) && IsADigit(context.chPrev);
- bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
- bool dotPart = context.ch == '.' &&
- IsADigit(context.chPrev) &&
- IsADigit(context.chNext);
- bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
- if (numberStart ||
- exponentPart ||
- signPart ||
- adjacentDigit ||
- dotPart ||
- afterExponent ||
- afterDot) {
- context.SetState(SCE_JSON_NUMBER);
- } else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
- context.SetState(SCE_JSON_ERROR);
- }
- }
- context.Forward();
- }
- context.Complete();
- }
- void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
- Sci_Position length,
- int,
- IDocument *pAccess) {
- if (!options.fold) {
- return;
- }
- LexAccessor styler(pAccess);
- Sci_PositionU currLine = styler.GetLine(startPos);
- Sci_PositionU endPos = startPos + length;
- int currLevel = SC_FOLDLEVELBASE;
- if (currLine > 0)
- currLevel = styler.LevelAt(currLine - 1) >> 16;
- int nextLevel = currLevel;
- int visibleChars = 0;
- for (Sci_PositionU i = startPos; i < endPos; i++) {
- char curr = styler.SafeGetCharAt(i);
- char next = styler.SafeGetCharAt(i+1);
- bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
- if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
- if (curr == '{' || curr == '[') {
- nextLevel++;
- } else if (curr == '}' || curr == ']') {
- nextLevel--;
- }
- }
- if (atEOL || i == (endPos-1)) {
- int level = currLevel | nextLevel << 16;
- if (!visibleChars && options.foldCompact) {
- level |= SC_FOLDLEVELWHITEFLAG;
- } else if (nextLevel > currLevel) {
- level |= SC_FOLDLEVELHEADERFLAG;
- }
- if (level != styler.LevelAt(currLine)) {
- styler.SetLevel(currLine, level);
- }
- currLine++;
- currLevel = nextLevel;
- visibleChars = 0;
- }
- if (!isspacechar(curr)) {
- visibleChars++;
- }
- }
- }
- LexerModule lmJSON(SCLEX_JSON,
- LexerJSON::LexerFactoryJSON,
- "json",
- JSONWordListDesc);
|