LexEDIFACT.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. // Scintilla Lexer for EDIFACT
  2. // Written by Iain Clarke, IMCSoft & Inobiz AB.
  3. // EDIFACT documented here: https://www.unece.org/cefact/edifact/welcome.html
  4. // and more readably here: https://en.wikipedia.org/wiki/EDIFACT
  5. // This code is subject to the same license terms as the rest of the scintilla project:
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. //
  8. // Header order must match order in scripts/HeaderOrder.txt
  9. #include <cstdlib>
  10. #include <cassert>
  11. #include <cstring>
  12. #include <cctype>
  13. #include "ILexer.h"
  14. #include "Scintilla.h"
  15. #include "SciLexer.h"
  16. #include "LexAccessor.h"
  17. #include "LexerModule.h"
  18. #ifdef SCI_NAMESPACE
  19. using namespace Scintilla;
  20. #endif
  21. class LexerEDIFACT : public ILexer
  22. {
  23. public:
  24. LexerEDIFACT();
  25. virtual ~LexerEDIFACT() {} // virtual destructor, as we inherit from ILexer
  26. static ILexer *Factory() {
  27. return new LexerEDIFACT;
  28. }
  29. virtual int SCI_METHOD Version() const
  30. {
  31. return lvOriginal;
  32. }
  33. virtual void SCI_METHOD Release()
  34. {
  35. delete this;
  36. }
  37. const char * SCI_METHOD PropertyNames()
  38. {
  39. return "fold";
  40. }
  41. int SCI_METHOD PropertyType(const char *)
  42. {
  43. return SC_TYPE_BOOLEAN; // Only one property!
  44. }
  45. const char * SCI_METHOD DescribeProperty(const char *name)
  46. {
  47. if (strcmp(name, "fold"))
  48. return NULL;
  49. return "Whether to apply folding to document or not";
  50. }
  51. virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val)
  52. {
  53. if (strcmp(key, "fold"))
  54. return -1;
  55. m_bFold = strcmp(val, "0") ? true : false;
  56. return 0;
  57. }
  58. const char * SCI_METHOD DescribeWordListSets()
  59. {
  60. return NULL;
  61. }
  62. virtual Sci_Position SCI_METHOD WordListSet(int, const char *)
  63. {
  64. return -1;
  65. }
  66. virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
  67. virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess);
  68. virtual void * SCI_METHOD PrivateCall(int, void *)
  69. {
  70. return NULL;
  71. }
  72. protected:
  73. Sci_Position InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength);
  74. Sci_Position FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const;
  75. Sci_Position ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const;
  76. int DetectSegmentHeader(char SegmentHeader[3]) const;
  77. bool m_bFold;
  78. char m_chComponent;
  79. char m_chData;
  80. char m_chDecimal;
  81. char m_chRelease;
  82. char m_chSegment;
  83. };
  84. LexerModule lmEDIFACT(SCLEX_EDIFACT, LexerEDIFACT::Factory, "edifact");
  85. ///////////////////////////////////////////////////////////////////////////////
  86. ///////////////////////////////////////////////////////////////////////////////
  87. LexerEDIFACT::LexerEDIFACT()
  88. {
  89. m_bFold = false;
  90. m_chComponent = ':';
  91. m_chData = '+';
  92. m_chDecimal = '.';
  93. m_chRelease = '?';
  94. m_chSegment = '\'';
  95. }
  96. void LexerEDIFACT::Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
  97. {
  98. Sci_PositionU posFinish = startPos + lengthDoc;
  99. InitialiseFromUNA(pAccess, posFinish);
  100. // Look backwards for a ' or a document beginning
  101. Sci_PositionU posCurrent = FindPreviousEnd(pAccess, startPos);
  102. // And jump past the ' if this was not the beginning of the document
  103. if (posCurrent != 0)
  104. posCurrent++;
  105. // Style buffer, so we're not issuing loads of notifications
  106. LexAccessor styler (pAccess);
  107. pAccess->StartStyling(posCurrent, '\377');
  108. styler.StartSegment(posCurrent);
  109. Sci_Position posSegmentStart = -1;
  110. while ((posCurrent < posFinish) && (posSegmentStart == -1))
  111. {
  112. posCurrent = ForwardPastWhitespace(pAccess, posCurrent, posFinish);
  113. // Mark whitespace as default
  114. styler.ColourTo(posCurrent - 1, SCE_EDI_DEFAULT);
  115. if (posCurrent >= posFinish)
  116. break;
  117. // Does is start with 3 charaters? ie, UNH
  118. char SegmentHeader[4] = { 0 };
  119. pAccess->GetCharRange(SegmentHeader, posCurrent, 3);
  120. int SegmentStyle = DetectSegmentHeader(SegmentHeader);
  121. if (SegmentStyle == SCE_EDI_BADSEGMENT)
  122. break;
  123. if (SegmentStyle == SCE_EDI_UNA)
  124. {
  125. posCurrent += 9;
  126. styler.ColourTo(posCurrent - 1, SCE_EDI_UNA); // UNA
  127. continue;
  128. }
  129. posSegmentStart = posCurrent;
  130. posCurrent += 3;
  131. styler.ColourTo(posCurrent - 1, SegmentStyle); // UNH etc
  132. // Colour in the rest of the segment
  133. for (char c; posCurrent < posFinish; posCurrent++)
  134. {
  135. pAccess->GetCharRange(&c, posCurrent, 1);
  136. if (c == m_chRelease) // ? escape character, check first, in case of ?'
  137. posCurrent++;
  138. else if (c == m_chSegment) // '
  139. {
  140. // Make sure the whole segment is on one line. styler won't let us go back in time, so we'll settle for marking the ' as bad.
  141. Sci_Position lineSegmentStart = pAccess->LineFromPosition(posSegmentStart);
  142. Sci_Position lineSegmentEnd = pAccess->LineFromPosition(posCurrent);
  143. if (lineSegmentStart == lineSegmentEnd)
  144. styler.ColourTo(posCurrent, SCE_EDI_SEGMENTEND);
  145. else
  146. styler.ColourTo(posCurrent, SCE_EDI_BADSEGMENT);
  147. posSegmentStart = -1;
  148. posCurrent++;
  149. break;
  150. }
  151. else if (c == m_chComponent) // :
  152. styler.ColourTo(posCurrent, SCE_EDI_SEP_COMPOSITE);
  153. else if (c == m_chData) // +
  154. styler.ColourTo(posCurrent, SCE_EDI_SEP_ELEMENT);
  155. else
  156. styler.ColourTo(posCurrent, SCE_EDI_DEFAULT);
  157. }
  158. }
  159. styler.Flush();
  160. if (posSegmentStart == -1)
  161. return;
  162. pAccess->StartStyling(posSegmentStart, -1);
  163. pAccess->SetStyleFor(posFinish - posSegmentStart, SCE_EDI_BADSEGMENT);
  164. }
  165. void LexerEDIFACT::Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int, IDocument *pAccess)
  166. {
  167. if (!m_bFold)
  168. return;
  169. // Fold at UNx lines. ie, UNx segments = 0, other segments = 1.
  170. // There's no sub folding, so we can be quite simple.
  171. Sci_Position endPos = startPos + lengthDoc;
  172. char SegmentHeader[4] = { 0 };
  173. int iIndentPrevious = 0;
  174. Sci_Position lineLast = pAccess->LineFromPosition(endPos);
  175. for (Sci_Position lineCurrent = pAccess->LineFromPosition(startPos); lineCurrent <= lineLast; lineCurrent++)
  176. {
  177. Sci_Position posLineStart = pAccess->LineStart(lineCurrent);
  178. posLineStart = ForwardPastWhitespace(pAccess, posLineStart, endPos);
  179. Sci_Position lineDataStart = pAccess->LineFromPosition(posLineStart);
  180. // Fill in whitespace lines?
  181. for (; lineCurrent < lineDataStart; lineCurrent++)
  182. pAccess->SetLevel(lineCurrent, SC_FOLDLEVELBASE | SC_FOLDLEVELWHITEFLAG | iIndentPrevious);
  183. pAccess->GetCharRange(SegmentHeader, posLineStart, 3);
  184. //if (DetectSegmentHeader(SegmentHeader) == SCE_EDI_BADSEGMENT) // Abort if this is not a proper segment header
  185. int level = 0;
  186. if (memcmp(SegmentHeader, "UNH", 3) == 0) // UNH starts blocks
  187. level = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
  188. // Check for UNA,B and Z. All others are inside messages
  189. else if (!memcmp(SegmentHeader, "UNA", 3) || !memcmp(SegmentHeader, "UNB", 3) || !memcmp(SegmentHeader, "UNZ", 3))
  190. level = SC_FOLDLEVELBASE;
  191. else
  192. level = SC_FOLDLEVELBASE | 1;
  193. pAccess->SetLevel(lineCurrent, level);
  194. iIndentPrevious = level & SC_FOLDLEVELNUMBERMASK;
  195. }
  196. }
  197. Sci_Position LexerEDIFACT::InitialiseFromUNA(IDocument *pAccess, Sci_PositionU MaxLength)
  198. {
  199. MaxLength -= 9; // drop 9 chars, to give us room for UNA:+.? '
  200. Sci_PositionU startPos = 0;
  201. startPos += ForwardPastWhitespace(pAccess, 0, MaxLength);
  202. if (startPos < MaxLength)
  203. {
  204. char bufUNA[9];
  205. pAccess->GetCharRange(bufUNA, startPos, 9);
  206. // Check it's UNA segment
  207. if (!memcmp(bufUNA, "UNA", 3))
  208. {
  209. m_chComponent = bufUNA[3];
  210. m_chData = bufUNA[4];
  211. m_chDecimal = bufUNA[5];
  212. m_chRelease = bufUNA[6];
  213. // bufUNA [7] should be space - reserved.
  214. m_chSegment = bufUNA[8];
  215. return 0; // success!
  216. }
  217. }
  218. // We failed to find a UNA, so drop to defaults
  219. m_chComponent = ':';
  220. m_chData = '+';
  221. m_chDecimal = '.';
  222. m_chRelease = '?';
  223. m_chSegment = '\'';
  224. return -1;
  225. }
  226. Sci_Position LexerEDIFACT::ForwardPastWhitespace(IDocument *pAccess, Sci_Position startPos, Sci_Position MaxLength) const
  227. {
  228. char c;
  229. while (startPos < MaxLength)
  230. {
  231. pAccess->GetCharRange(&c, startPos, 1);
  232. switch (c)
  233. {
  234. case '\t':
  235. case '\r':
  236. case '\n':
  237. case ' ':
  238. break;
  239. default:
  240. return startPos;
  241. }
  242. startPos++;
  243. }
  244. return MaxLength;
  245. }
  246. int LexerEDIFACT::DetectSegmentHeader(char SegmentHeader[3]) const
  247. {
  248. if (
  249. SegmentHeader[0] < 'A' || SegmentHeader[0] > 'Z' ||
  250. SegmentHeader[1] < 'A' || SegmentHeader[1] > 'Z' ||
  251. SegmentHeader[2] < 'A' || SegmentHeader[2] > 'Z')
  252. return SCE_EDI_BADSEGMENT;
  253. if (memcmp(SegmentHeader, "UNA", 3) == 0)
  254. return SCE_EDI_UNA;
  255. if (memcmp(SegmentHeader, "UNH", 3) == 0)
  256. return SCE_EDI_UNH;
  257. return SCE_EDI_SEGMENTSTART;
  258. }
  259. // Look backwards for a ' or a document beginning
  260. Sci_Position LexerEDIFACT::FindPreviousEnd(IDocument *pAccess, Sci_Position startPos) const
  261. {
  262. for (char c; startPos > 0; startPos--)
  263. {
  264. pAccess->GetCharRange(&c, startPos, 1);
  265. if (c == m_chSegment)
  266. return startPos;
  267. }
  268. // We didn't find a ', so just go with the beginning
  269. return 0;
  270. }