LexPerl.cpp 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810
  1. // Scintilla source code edit control
  2. /** @file LexPerl.cxx
  3. ** Lexer for Perl.
  4. ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
  5. **/
  6. // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
  7. // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
  8. // The License.txt file describes the conditions under which this software may be distributed.
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include <assert.h>
  14. #include <ctype.h>
  15. #include <string>
  16. #include <map>
  17. #include "ILexer.h"
  18. #include "Scintilla.h"
  19. #include "SciLexer.h"
  20. #include "WordList.h"
  21. #include "LexAccessor.h"
  22. #include "StyleContext.h"
  23. #include "CharacterSet.h"
  24. #include "LexerModule.h"
  25. #include "OptionSet.h"
  26. #ifdef SCI_NAMESPACE
  27. using namespace Scintilla;
  28. #endif
  29. // Info for HERE document handling from perldata.pod (reformatted):
  30. // ----------------------------------------------------------------
  31. // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
  32. // Following a << you specify a string to terminate the quoted material, and
  33. // all lines following the current line down to the terminating string are
  34. // the value of the item.
  35. // * The terminating string may be either an identifier (a word), or some
  36. // quoted text.
  37. // * If quoted, the type of quotes you use determines the treatment of the
  38. // text, just as in regular quoting.
  39. // * An unquoted identifier works like double quotes.
  40. // * There must be no space between the << and the identifier.
  41. // (If you put a space it will be treated as a null identifier,
  42. // which is valid, and matches the first empty line.)
  43. // (This is deprecated, -w warns of this syntax)
  44. // * The terminating string must appear by itself (unquoted and
  45. // with no surrounding whitespace) on the terminating line.
  46. #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
  47. #define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
  48. #define PERLNUM_OCTAL 2
  49. #define PERLNUM_FLOAT_EXP 3 // exponent part only
  50. #define PERLNUM_HEX 4 // may be a hex float
  51. #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
  52. #define PERLNUM_VECTOR 6
  53. #define PERLNUM_V_VECTOR 7
  54. #define PERLNUM_BAD 8
  55. #define BACK_NONE 0 // lookback state for bareword disambiguation:
  56. #define BACK_OPERATOR 1 // whitespace/comments are insignificant
  57. #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
  58. #define SUB_BEGIN 0 // states for subroutine prototype scan:
  59. #define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
  60. #define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
  61. #define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
  62. #define SUB_HAS_SUB 4 // 'sub' keyword
  63. // all interpolated styles are different from their parent styles by a constant difference
  64. // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
  65. #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
  66. static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
  67. // old-style keyword matcher; needed because GetCurrent() needs
  68. // current segment to be committed, but we may abandon early...
  69. char s[100];
  70. Sci_PositionU i, len = end - start;
  71. if (len > 30) { len = 30; }
  72. for (i = 0; i < len; i++, start++) s[i] = styler[start];
  73. s[i] = '\0';
  74. return keywords.InList(s);
  75. }
  76. static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
  77. int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
  78. // identifiers are recognized by Perl as barewords under some
  79. // conditions, the following attempts to do the disambiguation
  80. // by looking backward and forward; result in 2 LSB
  81. int result = 0;
  82. bool moreback = false; // true if passed newline/comments
  83. bool brace = false; // true if opening brace found
  84. // if BACK_NONE, neither operator nor keyword, so skip test
  85. if (backFlag == BACK_NONE)
  86. return result;
  87. // first look backwards past whitespace/comments to set EOL flag
  88. // (some disambiguation patterns must be on a single line)
  89. if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
  90. moreback = true;
  91. // look backwards at last significant lexed item for disambiguation
  92. bk = backPos - 1;
  93. int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
  94. if (ch == '{' && !moreback) {
  95. // {bareword: possible variable spec
  96. brace = true;
  97. } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
  98. // &bareword: subroutine call
  99. || styler.Match(bk - 1, "->")
  100. // ->bareword: part of variable spec
  101. || styler.Match(bk - 1, "::")
  102. // ::bareword: part of module spec
  103. || styler.Match(bk - 2, "sub")) {
  104. // sub bareword: subroutine declaration
  105. // (implied BACK_KEYWORD, no keywords end in 'sub'!)
  106. result |= 1;
  107. }
  108. // next, scan forward after word past tab/spaces only;
  109. // if ch isn't one of '[{(,' we can skip the test
  110. if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
  111. && fw < endPos) {
  112. while (ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)),
  113. IsASpaceOrTab(ch) && fw < endPos) {
  114. fw++;
  115. }
  116. if ((ch == '}' && brace)
  117. // {bareword}: variable spec
  118. || styler.Match(fw, "=>")) {
  119. // [{(, bareword=>: hash literal
  120. result |= 2;
  121. }
  122. }
  123. return result;
  124. }
  125. static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
  126. // when backtracking, we need to skip whitespace and comments
  127. int style;
  128. while ((p > 0) && (style = styler.StyleAt(p),
  129. style == SCE_PL_DEFAULT || style == SCE_PL_COMMENTLINE))
  130. p--;
  131. }
  132. static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
  133. // scan backward past whitespace and comments to find a lexeme
  134. skipWhitespaceComment(styler, bk);
  135. if (bk == 0)
  136. return 0;
  137. int sz = 1;
  138. style = styler.StyleAt(bk);
  139. while (bk > 0) { // find extent of lexeme
  140. if (styler.StyleAt(bk - 1) == style) {
  141. bk--; sz++;
  142. } else
  143. break;
  144. }
  145. return sz;
  146. }
  147. static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
  148. // backtrack to find open '{' corresponding to a '}', balanced
  149. // return significant style to be tested for '/' disambiguation
  150. int braceCount = 1;
  151. if (bk == 0)
  152. return SCE_PL_DEFAULT;
  153. while (--bk > 0) {
  154. if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
  155. int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
  156. if (bkch == ';') { // early out
  157. break;
  158. } else if (bkch == '}') {
  159. braceCount++;
  160. } else if (bkch == '{') {
  161. if (--braceCount == 0) break;
  162. }
  163. }
  164. }
  165. if (bk > 0 && braceCount == 0) {
  166. // balanced { found, bk > 0, skip more whitespace/comments
  167. bk--;
  168. skipWhitespaceComment(styler, bk);
  169. return styler.StyleAt(bk);
  170. }
  171. return SCE_PL_DEFAULT;
  172. }
  173. static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
  174. // backtrack to classify sub-styles of identifier under test
  175. // return sub-style to be tested for '/' disambiguation
  176. if (styler.SafeGetCharAt(bk) == '>') // inputsymbol, like <foo>
  177. return 1;
  178. // backtrack to check for possible "->" or "::" before identifier
  179. while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
  180. bk--;
  181. }
  182. while (bk > 0) {
  183. int bkstyle = styler.StyleAt(bk);
  184. if (bkstyle == SCE_PL_DEFAULT
  185. || bkstyle == SCE_PL_COMMENTLINE) {
  186. // skip whitespace, comments
  187. } else if (bkstyle == SCE_PL_OPERATOR) {
  188. // test for "->" and "::"
  189. if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
  190. return 2;
  191. } else
  192. return 3; // bare identifier
  193. bk--;
  194. }
  195. return 0;
  196. }
  197. static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
  198. // forward scan the current line to classify line for POD style
  199. int state = -1;
  200. while (pos < endPos) {
  201. int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
  202. if (ch == '\n' || ch == '\r') {
  203. if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
  204. break;
  205. }
  206. if (IsASpaceOrTab(ch)) { // whitespace, take note
  207. if (state == -1)
  208. state = SCE_PL_DEFAULT;
  209. } else if (state == SCE_PL_DEFAULT) { // verbatim POD line
  210. state = SCE_PL_POD_VERB;
  211. } else if (state != SCE_PL_POD_VERB) { // regular POD line
  212. state = SCE_PL_POD;
  213. }
  214. pos++;
  215. }
  216. if (state == -1)
  217. state = SCE_PL_DEFAULT;
  218. return state;
  219. }
  220. static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
  221. // backtrack to identify if we're starting a subroutine prototype
  222. // we also need to ignore whitespace/comments, format is like:
  223. // sub abc::pqr :const :prototype(...)
  224. // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
  225. // and a state machine generates legal subroutine syntax matches
  226. styler.Flush();
  227. int state = SUB_BEGIN;
  228. do {
  229. // find two lexemes, lexeme 2 follows lexeme 1
  230. int style2 = SCE_PL_DEFAULT;
  231. Sci_PositionU pos2 = bk;
  232. int len2 = findPrevLexeme(styler, pos2, style2);
  233. int style1 = SCE_PL_DEFAULT;
  234. Sci_PositionU pos1 = pos2;
  235. if (pos1 > 0) pos1--;
  236. int len1 = findPrevLexeme(styler, pos1, style1);
  237. if (len1 == 0 || len2 == 0) // lexeme pair must exist
  238. break;
  239. // match parts of syntax, if invalid subroutine syntax, break off
  240. if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
  241. styler.SafeGetCharAt(pos1) == ':') { // ':'
  242. if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
  243. if (len2 == 9 && styler.Match(pos2, "prototype")) { // ':' 'prototype'
  244. if (state == SUB_BEGIN) {
  245. state = SUB_HAS_PROTO;
  246. } else
  247. break;
  248. } else { // ':' <attribute>
  249. if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
  250. state = SUB_HAS_ATTRIB;
  251. } else
  252. break;
  253. }
  254. } else
  255. break;
  256. } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
  257. styler.Match(pos1, "::")) { // '::'
  258. if (style2 == SCE_PL_IDENTIFIER) { // '::' <identifier>
  259. state = SUB_HAS_MODULE;
  260. } else
  261. break;
  262. } else if (style1 == SCE_PL_WORD && len1 == 3 &&
  263. styler.Match(pos1, "sub")) { // 'sub'
  264. if (style2 == SCE_PL_IDENTIFIER) { // 'sub' <identifier>
  265. state = SUB_HAS_SUB;
  266. } else
  267. break;
  268. } else
  269. break;
  270. bk = pos1; // set position for finding next lexeme pair
  271. if (bk > 0) bk--;
  272. } while (state != SUB_HAS_SUB);
  273. return (state == SUB_HAS_SUB);
  274. }
  275. static int actualNumStyle(int numberStyle) {
  276. if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
  277. return SCE_PL_STRING;
  278. } else if (numberStyle == PERLNUM_BAD) {
  279. return SCE_PL_ERROR;
  280. }
  281. return SCE_PL_NUMBER;
  282. }
  283. static int opposite(int ch) {
  284. if (ch == '(') return ')';
  285. if (ch == '[') return ']';
  286. if (ch == '{') return '}';
  287. if (ch == '<') return '>';
  288. return ch;
  289. }
  290. static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
  291. Sci_Position pos = styler.LineStart(line);
  292. Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
  293. for (Sci_Position i = pos; i < eol_pos; i++) {
  294. char ch = styler[i];
  295. int style = styler.StyleAt(i);
  296. if (ch == '#' && style == SCE_PL_COMMENTLINE)
  297. return true;
  298. else if (!IsASpaceOrTab(ch))
  299. return false;
  300. }
  301. return false;
  302. }
  303. static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
  304. Sci_Position pos = styler.LineStart(line);
  305. int style = styler.StyleAt(pos);
  306. if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
  307. return true;
  308. }
  309. return false;
  310. }
  311. static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
  312. int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
  313. if (lvl >= '1' && lvl <= '4') {
  314. return lvl - '0';
  315. }
  316. return 0;
  317. }
  318. // An individual named option for use in an OptionSet
  319. // Options used for LexerPerl
  320. struct OptionsPerl {
  321. bool fold;
  322. bool foldComment;
  323. bool foldCompact;
  324. // Custom folding of POD and packages
  325. bool foldPOD; // fold.perl.pod
  326. // Enable folding Pod blocks when using the Perl lexer.
  327. bool foldPackage; // fold.perl.package
  328. // Enable folding packages when using the Perl lexer.
  329. bool foldCommentExplicit;
  330. bool foldAtElse;
  331. OptionsPerl() {
  332. fold = false;
  333. foldComment = false;
  334. foldCompact = true;
  335. foldPOD = true;
  336. foldPackage = true;
  337. foldCommentExplicit = true;
  338. foldAtElse = false;
  339. }
  340. };
  341. static const char *const perlWordListDesc[] = {
  342. "Keywords",
  343. 0
  344. };
  345. struct OptionSetPerl : public OptionSet<OptionsPerl> {
  346. OptionSetPerl() {
  347. DefineProperty("fold", &OptionsPerl::fold);
  348. DefineProperty("fold.comment", &OptionsPerl::foldComment);
  349. DefineProperty("fold.compact", &OptionsPerl::foldCompact);
  350. DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
  351. "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
  352. DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
  353. "Set to 0 to disable folding packages when using the Perl lexer.");
  354. DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
  355. "Set to 0 to disable explicit folding.");
  356. DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
  357. "This option enables Perl folding on a \"} else {\" line of an if statement.");
  358. DefineWordListSets(perlWordListDesc);
  359. }
  360. };
  361. class LexerPerl : public ILexer {
  362. CharacterSet setWordStart;
  363. CharacterSet setWord;
  364. CharacterSet setSpecialVar;
  365. CharacterSet setControlVar;
  366. WordList keywords;
  367. OptionsPerl options;
  368. OptionSetPerl osPerl;
  369. public:
  370. LexerPerl() :
  371. setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
  372. setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
  373. setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
  374. setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
  375. }
  376. virtual ~LexerPerl() {
  377. }
  378. void SCI_METHOD Release() {
  379. delete this;
  380. }
  381. int SCI_METHOD Version() const {
  382. return lvOriginal;
  383. }
  384. const char *SCI_METHOD PropertyNames() {
  385. return osPerl.PropertyNames();
  386. }
  387. int SCI_METHOD PropertyType(const char *name) {
  388. return osPerl.PropertyType(name);
  389. }
  390. const char *SCI_METHOD DescribeProperty(const char *name) {
  391. return osPerl.DescribeProperty(name);
  392. }
  393. Sci_Position SCI_METHOD PropertySet(const char *key, const char *val);
  394. const char *SCI_METHOD DescribeWordListSets() {
  395. return osPerl.DescribeWordListSets();
  396. }
  397. Sci_Position SCI_METHOD WordListSet(int n, const char *wl);
  398. void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  399. void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess);
  400. void *SCI_METHOD PrivateCall(int, void *) {
  401. return 0;
  402. }
  403. static ILexer *LexerFactoryPerl() {
  404. return new LexerPerl();
  405. }
  406. int InputSymbolScan(StyleContext &sc);
  407. void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
  408. };
  409. Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
  410. if (osPerl.PropertySet(&options, key, val)) {
  411. return 0;
  412. }
  413. return -1;
  414. }
  415. Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
  416. WordList *wordListN = 0;
  417. switch (n) {
  418. case 0:
  419. wordListN = &keywords;
  420. break;
  421. }
  422. Sci_Position firstModification = -1;
  423. if (wordListN) {
  424. WordList wlNew;
  425. wlNew.Set(wl);
  426. if (*wordListN != wlNew) {
  427. wordListN->Set(wl);
  428. firstModification = 0;
  429. }
  430. }
  431. return firstModification;
  432. }
  433. int LexerPerl::InputSymbolScan(StyleContext &sc) {
  434. // forward scan for matching > on same line; file handles
  435. int c, sLen = 0;
  436. while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
  437. if (c == '\r' || c == '\n') {
  438. return 0;
  439. } else if (c == '>') {
  440. if (sc.Match("<=>")) // '<=>' case
  441. return 0;
  442. return sLen;
  443. }
  444. }
  445. return 0;
  446. }
  447. void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
  448. // interpolate a segment (with no active backslashes or delimiters within)
  449. // switch in or out of an interpolation style or continue current style
  450. // commit variable patterns if found, trim segment, repeat until done
  451. while (maxSeg > 0) {
  452. bool isVar = false;
  453. int sLen = 0;
  454. if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
  455. // $#[$]*word [$@][$]*word (where word or {word} is always present)
  456. bool braces = false;
  457. sLen = 1;
  458. if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
  459. sLen++;
  460. }
  461. while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within
  462. sLen++;
  463. if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word}
  464. sLen++;
  465. braces = true;
  466. }
  467. if (maxSeg > sLen) {
  468. int c = sc.GetRelativeCharacter(sLen);
  469. if (setWordStart.Contains(c)) { // word (various)
  470. sLen++;
  471. isVar = true;
  472. while (maxSeg > sLen) {
  473. if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
  474. break;
  475. sLen++;
  476. }
  477. } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit}
  478. sLen++;
  479. isVar = true;
  480. }
  481. }
  482. if (braces) {
  483. if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word}
  484. sLen++;
  485. } else
  486. isVar = false;
  487. }
  488. }
  489. if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns
  490. int c = sc.chNext;
  491. if (sc.ch == '$') {
  492. sLen = 1;
  493. if (IsADigit(c)) { // $[0-9] and slurp trailing digits
  494. sLen++;
  495. isVar = true;
  496. while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
  497. sLen++;
  498. } else if (setSpecialVar.Contains(c)) { // $ special variables
  499. sLen++;
  500. isVar = true;
  501. } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) { // $ additional
  502. sLen++;
  503. isVar = true;
  504. } else if (c == '^') { // $^A control-char style
  505. sLen++;
  506. if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
  507. sLen++;
  508. isVar = true;
  509. }
  510. }
  511. } else if (sc.ch == '@') {
  512. sLen = 1;
  513. if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
  514. sLen++;
  515. isVar = true;
  516. }
  517. }
  518. }
  519. if (isVar) { // commit as interpolated variable or normal character
  520. if (sc.state < SCE_PL_STRING_VAR)
  521. sc.SetState(sc.state + INTERPOLATE_SHIFT);
  522. sc.Forward(sLen);
  523. maxSeg -= sLen;
  524. } else {
  525. if (sc.state >= SCE_PL_STRING_VAR)
  526. sc.SetState(sc.state - INTERPOLATE_SHIFT);
  527. sc.Forward();
  528. maxSeg--;
  529. }
  530. }
  531. if (sc.state >= SCE_PL_STRING_VAR)
  532. sc.SetState(sc.state - INTERPOLATE_SHIFT);
  533. }
  534. void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
  535. LexAccessor styler(pAccess);
  536. // keywords that forces /PATTERN/ at all times; should track vim's behaviour
  537. WordList reWords;
  538. reWords.Set("elsif if split while");
  539. // charset classes
  540. CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
  541. // lexing of "%*</" operators is non-trivial; these are missing in the set below
  542. CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
  543. CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
  544. CharacterSet setModifiers(CharacterSet::setAlpha);
  545. CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
  546. // setArray and setHash also accepts chars for special vars like $_,
  547. // which are then truncated when the next char does not match setVar
  548. CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
  549. CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
  550. CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
  551. CharacterSet &setPOD = setModifiers;
  552. CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
  553. CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
  554. CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
  555. CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
  556. // for format identifiers
  557. CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
  558. CharacterSet &setFormat = setHereDocDelim;
  559. // Lexer for perl often has to backtrack to start of current style to determine
  560. // which characters are being used as quotes, how deeply nested is the
  561. // start position and what the termination string is for HERE documents.
  562. class HereDocCls { // Class to manage HERE doc sequence
  563. public:
  564. int State;
  565. // 0: '<<' encountered
  566. // 1: collect the delimiter
  567. // 2: here doc text (lines after the delimiter)
  568. int Quote; // the char after '<<'
  569. bool Quoted; // true if Quote in ('\'','"','`')
  570. int DelimiterLength; // strlen(Delimiter)
  571. char Delimiter[HERE_DELIM_MAX]; // the Delimiter
  572. HereDocCls() {
  573. State = 0;
  574. Quote = 0;
  575. Quoted = false;
  576. DelimiterLength = 0;
  577. Delimiter[0] = '\0';
  578. }
  579. void Append(int ch) {
  580. Delimiter[DelimiterLength++] = static_cast<char>(ch);
  581. Delimiter[DelimiterLength] = '\0';
  582. }
  583. ~HereDocCls() {
  584. }
  585. };
  586. HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
  587. class QuoteCls { // Class to manage quote pairs
  588. public:
  589. int Rep;
  590. int Count;
  591. int Up, Down;
  592. QuoteCls() {
  593. New(1);
  594. }
  595. void New(int r = 1) {
  596. Rep = r;
  597. Count = 0;
  598. Up = '\0';
  599. Down = '\0';
  600. }
  601. void Open(int u) {
  602. Count++;
  603. Up = u;
  604. Down = opposite(Up);
  605. }
  606. };
  607. QuoteCls Quote;
  608. // additional state for number lexing
  609. int numState = PERLNUM_DECIMAL;
  610. int dotCount = 0;
  611. Sci_PositionU endPos = startPos + length;
  612. // Backtrack to beginning of style if required...
  613. // If in a long distance lexical state, backtrack to find quote characters.
  614. // Includes strings (may be multi-line), numbers (additional state), format
  615. // bodies, as well as POD sections.
  616. if (initStyle == SCE_PL_HERE_Q
  617. || initStyle == SCE_PL_HERE_QQ
  618. || initStyle == SCE_PL_HERE_QX
  619. || initStyle == SCE_PL_FORMAT
  620. || initStyle == SCE_PL_HERE_QQ_VAR
  621. || initStyle == SCE_PL_HERE_QX_VAR
  622. ) {
  623. // backtrack through multiple styles to reach the delimiter start
  624. int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
  625. while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
  626. startPos--;
  627. }
  628. startPos = styler.LineStart(styler.GetLine(startPos));
  629. initStyle = styler.StyleAt(startPos - 1);
  630. }
  631. if (initStyle == SCE_PL_STRING
  632. || initStyle == SCE_PL_STRING_QQ
  633. || initStyle == SCE_PL_BACKTICKS
  634. || initStyle == SCE_PL_STRING_QX
  635. || initStyle == SCE_PL_REGEX
  636. || initStyle == SCE_PL_STRING_QR
  637. || initStyle == SCE_PL_REGSUBST
  638. || initStyle == SCE_PL_STRING_VAR
  639. || initStyle == SCE_PL_STRING_QQ_VAR
  640. || initStyle == SCE_PL_BACKTICKS_VAR
  641. || initStyle == SCE_PL_STRING_QX_VAR
  642. || initStyle == SCE_PL_REGEX_VAR
  643. || initStyle == SCE_PL_STRING_QR_VAR
  644. || initStyle == SCE_PL_REGSUBST_VAR
  645. ) {
  646. // for interpolation, must backtrack through a mix of two different styles
  647. int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
  648. initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
  649. while (startPos > 1) {
  650. int st = styler.StyleAt(startPos - 1);
  651. if ((st != initStyle) && (st != otherStyle))
  652. break;
  653. startPos--;
  654. }
  655. initStyle = SCE_PL_DEFAULT;
  656. } else if (initStyle == SCE_PL_STRING_Q
  657. || initStyle == SCE_PL_STRING_QW
  658. || initStyle == SCE_PL_XLAT
  659. || initStyle == SCE_PL_CHARACTER
  660. || initStyle == SCE_PL_NUMBER
  661. || initStyle == SCE_PL_IDENTIFIER
  662. || initStyle == SCE_PL_ERROR
  663. || initStyle == SCE_PL_SUB_PROTOTYPE
  664. ) {
  665. while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
  666. startPos--;
  667. }
  668. initStyle = SCE_PL_DEFAULT;
  669. } else if (initStyle == SCE_PL_POD
  670. || initStyle == SCE_PL_POD_VERB
  671. ) {
  672. // POD backtracking finds preceding blank lines and goes back past them
  673. Sci_Position ln = styler.GetLine(startPos);
  674. if (ln > 0) {
  675. initStyle = styler.StyleAt(styler.LineStart(--ln));
  676. if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
  677. while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
  678. ln--;
  679. }
  680. startPos = styler.LineStart(++ln);
  681. initStyle = styler.StyleAt(startPos - 1);
  682. } else {
  683. startPos = 0;
  684. initStyle = SCE_PL_DEFAULT;
  685. }
  686. }
  687. // backFlag, backPos are additional state to aid identifier corner cases.
  688. // Look backwards past whitespace and comments in order to detect either
  689. // operator or keyword. Later updated as we go along.
  690. int backFlag = BACK_NONE;
  691. Sci_PositionU backPos = startPos;
  692. if (backPos > 0) {
  693. backPos--;
  694. skipWhitespaceComment(styler, backPos);
  695. if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
  696. backFlag = BACK_OPERATOR;
  697. else if (styler.StyleAt(backPos) == SCE_PL_WORD)
  698. backFlag = BACK_KEYWORD;
  699. backPos++;
  700. }
  701. StyleContext sc(startPos, endPos - startPos, initStyle, styler);
  702. for (; sc.More(); sc.Forward()) {
  703. // Determine if the current state should terminate.
  704. switch (sc.state) {
  705. case SCE_PL_OPERATOR:
  706. sc.SetState(SCE_PL_DEFAULT);
  707. backFlag = BACK_OPERATOR;
  708. backPos = sc.currentPos;
  709. break;
  710. case SCE_PL_IDENTIFIER: // identifier, bareword, inputsymbol
  711. if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
  712. || sc.Match('.', '.')
  713. || sc.chPrev == '>') { // end of inputsymbol
  714. sc.SetState(SCE_PL_DEFAULT);
  715. }
  716. break;
  717. case SCE_PL_WORD: // keyword, plus special cases
  718. if (!setWord.Contains(sc.ch)) {
  719. char s[100];
  720. sc.GetCurrent(s, sizeof(s));
  721. if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
  722. sc.ChangeState(SCE_PL_DATASECTION);
  723. } else {
  724. if ((strcmp(s, "format") == 0)) {
  725. sc.SetState(SCE_PL_FORMAT_IDENT);
  726. HereDoc.State = 0;
  727. } else {
  728. sc.SetState(SCE_PL_DEFAULT);
  729. }
  730. backFlag = BACK_KEYWORD;
  731. backPos = sc.currentPos;
  732. }
  733. }
  734. break;
  735. case SCE_PL_SCALAR:
  736. case SCE_PL_ARRAY:
  737. case SCE_PL_HASH:
  738. case SCE_PL_SYMBOLTABLE:
  739. if (sc.Match(':', ':')) { // skip ::
  740. sc.Forward();
  741. } else if (!setVar.Contains(sc.ch)) {
  742. if (sc.LengthCurrent() == 1) {
  743. // Special variable: $(, $_ etc.
  744. sc.Forward();
  745. }
  746. sc.SetState(SCE_PL_DEFAULT);
  747. }
  748. break;
  749. case SCE_PL_NUMBER:
  750. // if no early break, number style is terminated at "(go through)"
  751. if (sc.ch == '.') {
  752. if (sc.chNext == '.') {
  753. // double dot is always an operator (go through)
  754. } else if (numState <= PERLNUM_FLOAT_EXP) {
  755. // non-decimal number or float exponent, consume next dot
  756. sc.SetState(SCE_PL_OPERATOR);
  757. break;
  758. } else { // decimal or vectors allows dots
  759. dotCount++;
  760. if (numState == PERLNUM_DECIMAL) {
  761. if (dotCount <= 1) // number with one dot in it
  762. break;
  763. if (IsADigit(sc.chNext)) { // really a vector
  764. numState = PERLNUM_VECTOR;
  765. break;
  766. }
  767. // number then dot (go through)
  768. } else if (numState == PERLNUM_HEX) {
  769. if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
  770. break; // hex with one dot is a hex float
  771. } else {
  772. sc.SetState(SCE_PL_OPERATOR);
  773. break;
  774. }
  775. // hex then dot (go through)
  776. } else if (IsADigit(sc.chNext)) // vectors
  777. break;
  778. // vector then dot (go through)
  779. }
  780. } else if (sc.ch == '_') {
  781. // permissive underscoring for number and vector literals
  782. break;
  783. } else if (numState == PERLNUM_DECIMAL) {
  784. if (sc.ch == 'E' || sc.ch == 'e') { // exponent, sign
  785. numState = PERLNUM_FLOAT_EXP;
  786. if (sc.chNext == '+' || sc.chNext == '-') {
  787. sc.Forward();
  788. }
  789. break;
  790. } else if (IsADigit(sc.ch))
  791. break;
  792. // number then word (go through)
  793. } else if (numState == PERLNUM_HEX) {
  794. if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign
  795. numState = PERLNUM_FLOAT_EXP;
  796. if (sc.chNext == '+' || sc.chNext == '-') {
  797. sc.Forward();
  798. }
  799. break;
  800. } else if (IsADigit(sc.ch, 16))
  801. break;
  802. // hex or hex float then word (go through)
  803. } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
  804. if (IsADigit(sc.ch)) // vector
  805. break;
  806. if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
  807. sc.ChangeState(SCE_PL_IDENTIFIER);
  808. break;
  809. }
  810. // vector then word (go through)
  811. } else if (IsADigit(sc.ch)) {
  812. if (numState == PERLNUM_FLOAT_EXP) {
  813. break;
  814. } else if (numState == PERLNUM_OCTAL) {
  815. if (sc.ch <= '7') break;
  816. } else if (numState == PERLNUM_BINARY) {
  817. if (sc.ch <= '1') break;
  818. }
  819. // mark invalid octal, binary numbers (go through)
  820. numState = PERLNUM_BAD;
  821. break;
  822. }
  823. // complete current number or vector
  824. sc.ChangeState(actualNumStyle(numState));
  825. sc.SetState(SCE_PL_DEFAULT);
  826. break;
  827. case SCE_PL_COMMENTLINE:
  828. if (sc.atLineEnd) {
  829. sc.SetState(SCE_PL_DEFAULT);
  830. }
  831. break;
  832. case SCE_PL_HERE_DELIM:
  833. if (HereDoc.State == 0) { // '<<' encountered
  834. int delim_ch = sc.chNext;
  835. Sci_Position ws_skip = 0;
  836. HereDoc.State = 1; // pre-init HERE doc class
  837. HereDoc.Quote = sc.chNext;
  838. HereDoc.Quoted = false;
  839. HereDoc.DelimiterLength = 0;
  840. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  841. if (IsASpaceOrTab(delim_ch)) {
  842. // skip whitespace; legal only for quoted delimiters
  843. Sci_PositionU i = sc.currentPos + 1;
  844. while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
  845. i++;
  846. delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  847. }
  848. ws_skip = i - sc.currentPos - 1;
  849. }
  850. if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
  851. // a quoted here-doc delimiter; skip any whitespace
  852. sc.Forward(ws_skip + 1);
  853. HereDoc.Quote = delim_ch;
  854. HereDoc.Quoted = true;
  855. } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
  856. || ws_skip > 0) {
  857. // left shift << or <<= operator cases
  858. // restore position if operator
  859. sc.ChangeState(SCE_PL_OPERATOR);
  860. sc.ForwardSetState(SCE_PL_DEFAULT);
  861. backFlag = BACK_OPERATOR;
  862. backPos = sc.currentPos;
  863. HereDoc.State = 0;
  864. } else {
  865. // specially handle initial '\' for identifier
  866. if (ws_skip == 0 && HereDoc.Quote == '\\')
  867. sc.Forward();
  868. // an unquoted here-doc delimiter, no special handling
  869. // (cannot be prefixed by spaces/tabs), or
  870. // symbols terminates; deprecated zero-length delimiter
  871. }
  872. } else if (HereDoc.State == 1) { // collect the delimiter
  873. backFlag = BACK_NONE;
  874. if (HereDoc.Quoted) { // a quoted here-doc delimiter
  875. if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
  876. sc.ForwardSetState(SCE_PL_DEFAULT);
  877. } else if (!sc.atLineEnd) {
  878. if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
  879. sc.Forward();
  880. }
  881. if (sc.ch != '\r') { // skip CR if CRLF
  882. int i = 0; // else append char, possibly an extended char
  883. while (i < sc.width) {
  884. HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
  885. i++;
  886. }
  887. }
  888. }
  889. } else { // an unquoted here-doc delimiter, no extended charsets
  890. if (setHereDocDelim.Contains(sc.ch)) {
  891. HereDoc.Append(sc.ch);
  892. } else {
  893. sc.SetState(SCE_PL_DEFAULT);
  894. }
  895. }
  896. if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
  897. sc.SetState(SCE_PL_ERROR);
  898. HereDoc.State = 0;
  899. }
  900. }
  901. break;
  902. case SCE_PL_HERE_Q:
  903. case SCE_PL_HERE_QQ:
  904. case SCE_PL_HERE_QX:
  905. // also implies HereDoc.State == 2
  906. sc.Complete();
  907. if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
  908. int c = sc.GetRelative(HereDoc.DelimiterLength);
  909. if (c == '\r' || c == '\n') { // peek first, do not consume match
  910. sc.ForwardBytes(HereDoc.DelimiterLength);
  911. sc.SetState(SCE_PL_DEFAULT);
  912. backFlag = BACK_NONE;
  913. HereDoc.State = 0;
  914. if (!sc.atLineEnd)
  915. sc.Forward();
  916. break;
  917. }
  918. }
  919. if (sc.state == SCE_PL_HERE_Q) { // \EOF and 'EOF' non-interpolated
  920. while (!sc.atLineEnd)
  921. sc.Forward();
  922. break;
  923. }
  924. while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated
  925. int c, sLen = 0, endType = 0;
  926. while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
  927. // scan to break string into segments
  928. if (c == '\\') {
  929. endType = 1; break;
  930. } else if (c == '\r' || c == '\n') {
  931. endType = 2; break;
  932. }
  933. sLen++;
  934. }
  935. if (sLen > 0) // process non-empty segments
  936. InterpolateSegment(sc, sLen);
  937. if (endType == 1) {
  938. sc.Forward();
  939. // \ at end-of-line does not appear to have any effect, skip
  940. if (sc.ch != '\r' && sc.ch != '\n')
  941. sc.Forward();
  942. } else if (endType == 2) {
  943. if (!sc.atLineEnd)
  944. sc.Forward();
  945. }
  946. }
  947. break;
  948. case SCE_PL_POD:
  949. case SCE_PL_POD_VERB: {
  950. Sci_PositionU fw = sc.currentPos;
  951. Sci_Position ln = styler.GetLine(fw);
  952. if (sc.atLineStart && sc.Match("=cut")) { // end of POD
  953. sc.SetState(SCE_PL_POD);
  954. sc.Forward(4);
  955. sc.SetState(SCE_PL_DEFAULT);
  956. styler.SetLineState(ln, SCE_PL_POD);
  957. break;
  958. }
  959. int pod = podLineScan(styler, fw, endPos); // classify POD line
  960. styler.SetLineState(ln, pod);
  961. if (pod == SCE_PL_DEFAULT) {
  962. if (sc.state == SCE_PL_POD_VERB) {
  963. Sci_PositionU fw2 = fw;
  964. while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
  965. fw = fw2++; // penultimate line (last blank line)
  966. pod = podLineScan(styler, fw2, endPos);
  967. styler.SetLineState(styler.GetLine(fw2), pod);
  968. }
  969. if (pod == SCE_PL_POD) { // truncate verbatim POD early
  970. sc.SetState(SCE_PL_POD);
  971. } else
  972. fw = fw2;
  973. }
  974. } else {
  975. if (pod == SCE_PL_POD_VERB // still part of current paragraph
  976. && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
  977. pod = SCE_PL_POD;
  978. styler.SetLineState(ln, pod);
  979. } else if (pod == SCE_PL_POD
  980. && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
  981. pod = SCE_PL_POD_VERB;
  982. styler.SetLineState(ln, pod);
  983. }
  984. sc.SetState(pod);
  985. }
  986. sc.ForwardBytes(fw - sc.currentPos); // commit style
  987. }
  988. break;
  989. case SCE_PL_REGEX:
  990. case SCE_PL_STRING_QR:
  991. if (Quote.Rep <= 0) {
  992. if (!setModifiers.Contains(sc.ch))
  993. sc.SetState(SCE_PL_DEFAULT);
  994. } else if (!Quote.Up && !IsASpace(sc.ch)) {
  995. Quote.Open(sc.ch);
  996. } else {
  997. int c, sLen = 0, endType = 0;
  998. while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
  999. // scan to break string into segments
  1000. if (IsASpace(c)) {
  1001. break;
  1002. } else if (c == '\\' && Quote.Up != '\\') {
  1003. endType = 1; break;
  1004. } else if (c == Quote.Down) {
  1005. Quote.Count--;
  1006. if (Quote.Count == 0) {
  1007. Quote.Rep--;
  1008. break;
  1009. }
  1010. } else if (c == Quote.Up)
  1011. Quote.Count++;
  1012. sLen++;
  1013. }
  1014. if (sLen > 0) { // process non-empty segments
  1015. if (Quote.Up != '\'') {
  1016. InterpolateSegment(sc, sLen, true);
  1017. } else // non-interpolated path
  1018. sc.Forward(sLen);
  1019. }
  1020. if (endType == 1)
  1021. sc.Forward();
  1022. }
  1023. break;
  1024. case SCE_PL_REGSUBST:
  1025. case SCE_PL_XLAT:
  1026. if (Quote.Rep <= 0) {
  1027. if (!setModifiers.Contains(sc.ch))
  1028. sc.SetState(SCE_PL_DEFAULT);
  1029. } else if (!Quote.Up && !IsASpace(sc.ch)) {
  1030. Quote.Open(sc.ch);
  1031. } else {
  1032. int c, sLen = 0, endType = 0;
  1033. bool isPattern = (Quote.Rep == 2);
  1034. while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
  1035. // scan to break string into segments
  1036. if (c == '\\' && Quote.Up != '\\') {
  1037. endType = 2; break;
  1038. } else if (Quote.Count == 0 && Quote.Rep == 1) {
  1039. // We matched something like s(...) or tr{...}, Perl 5.10
  1040. // appears to allow almost any character for use as the
  1041. // next delimiters. Whitespace and comments are accepted in
  1042. // between, but we'll limit to whitespace here.
  1043. // For '#', if no whitespace in between, it's a delimiter.
  1044. if (IsASpace(c)) {
  1045. // Keep going
  1046. } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
  1047. endType = 3;
  1048. } else
  1049. Quote.Open(c);
  1050. break;
  1051. } else if (c == Quote.Down) {
  1052. Quote.Count--;
  1053. if (Quote.Count == 0) {
  1054. Quote.Rep--;
  1055. endType = 1;
  1056. }
  1057. if (Quote.Up == Quote.Down)
  1058. Quote.Count++;
  1059. if (endType == 1)
  1060. break;
  1061. } else if (c == Quote.Up) {
  1062. Quote.Count++;
  1063. } else if (IsASpace(c))
  1064. break;
  1065. sLen++;
  1066. }
  1067. if (sLen > 0) { // process non-empty segments
  1068. if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
  1069. InterpolateSegment(sc, sLen, isPattern);
  1070. } else // non-interpolated path
  1071. sc.Forward(sLen);
  1072. }
  1073. if (endType == 2) {
  1074. sc.Forward();
  1075. } else if (endType == 3)
  1076. sc.SetState(SCE_PL_DEFAULT);
  1077. }
  1078. break;
  1079. case SCE_PL_STRING_Q:
  1080. case SCE_PL_STRING_QQ:
  1081. case SCE_PL_STRING_QX:
  1082. case SCE_PL_STRING_QW:
  1083. case SCE_PL_STRING:
  1084. case SCE_PL_CHARACTER:
  1085. case SCE_PL_BACKTICKS:
  1086. if (!Quote.Down && !IsASpace(sc.ch)) {
  1087. Quote.Open(sc.ch);
  1088. } else {
  1089. int c, sLen = 0, endType = 0;
  1090. while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
  1091. // scan to break string into segments
  1092. if (IsASpace(c)) {
  1093. break;
  1094. } else if (c == '\\' && Quote.Up != '\\') {
  1095. endType = 2; break;
  1096. } else if (c == Quote.Down) {
  1097. Quote.Count--;
  1098. if (Quote.Count == 0) {
  1099. endType = 3; break;
  1100. }
  1101. } else if (c == Quote.Up)
  1102. Quote.Count++;
  1103. sLen++;
  1104. }
  1105. if (sLen > 0) { // process non-empty segments
  1106. switch (sc.state) {
  1107. case SCE_PL_STRING:
  1108. case SCE_PL_STRING_QQ:
  1109. case SCE_PL_BACKTICKS:
  1110. InterpolateSegment(sc, sLen);
  1111. break;
  1112. case SCE_PL_STRING_QX:
  1113. if (Quote.Up != '\'') {
  1114. InterpolateSegment(sc, sLen);
  1115. break;
  1116. }
  1117. // (continued for ' delim)
  1118. default: // non-interpolated path
  1119. sc.Forward(sLen);
  1120. }
  1121. }
  1122. if (endType == 2) {
  1123. sc.Forward();
  1124. } else if (endType == 3)
  1125. sc.ForwardSetState(SCE_PL_DEFAULT);
  1126. }
  1127. break;
  1128. case SCE_PL_SUB_PROTOTYPE: {
  1129. int i = 0;
  1130. // forward scan; must all be valid proto characters
  1131. while (setSubPrototype.Contains(sc.GetRelative(i)))
  1132. i++;
  1133. if (sc.GetRelative(i) == ')') { // valid sub prototype
  1134. sc.ForwardBytes(i);
  1135. sc.ForwardSetState(SCE_PL_DEFAULT);
  1136. } else {
  1137. // abandon prototype, restart from '('
  1138. sc.ChangeState(SCE_PL_OPERATOR);
  1139. sc.SetState(SCE_PL_DEFAULT);
  1140. }
  1141. }
  1142. break;
  1143. case SCE_PL_FORMAT: {
  1144. sc.Complete();
  1145. if (sc.Match('.')) {
  1146. sc.Forward();
  1147. if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
  1148. sc.SetState(SCE_PL_DEFAULT);
  1149. }
  1150. while (!sc.atLineEnd)
  1151. sc.Forward();
  1152. }
  1153. break;
  1154. case SCE_PL_ERROR:
  1155. break;
  1156. }
  1157. // Needed for specific continuation styles (one follows the other)
  1158. switch (sc.state) {
  1159. // continued from SCE_PL_WORD
  1160. case SCE_PL_FORMAT_IDENT:
  1161. // occupies HereDoc state 3 to avoid clashing with HERE docs
  1162. if (IsASpaceOrTab(sc.ch)) { // skip whitespace
  1163. sc.ChangeState(SCE_PL_DEFAULT);
  1164. while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
  1165. sc.Forward();
  1166. sc.SetState(SCE_PL_FORMAT_IDENT);
  1167. }
  1168. if (setFormatStart.Contains(sc.ch)) { // identifier or '='
  1169. if (sc.ch != '=') {
  1170. do {
  1171. sc.Forward();
  1172. } while (setFormat.Contains(sc.ch));
  1173. }
  1174. while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
  1175. sc.Forward();
  1176. if (sc.ch == '=') {
  1177. sc.ForwardSetState(SCE_PL_DEFAULT);
  1178. HereDoc.State = 3;
  1179. } else {
  1180. // invalid identifier; inexact fallback, but hey
  1181. sc.ChangeState(SCE_PL_IDENTIFIER);
  1182. sc.SetState(SCE_PL_DEFAULT);
  1183. }
  1184. } else {
  1185. sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
  1186. }
  1187. backFlag = BACK_NONE;
  1188. break;
  1189. }
  1190. // Must check end of HereDoc states here before default state is handled
  1191. if (HereDoc.State == 1 && sc.atLineEnd) {
  1192. // Begin of here-doc (the line after the here-doc delimiter):
  1193. // Lexically, the here-doc starts from the next line after the >>, but the
  1194. // first line of here-doc seem to follow the style of the last EOL sequence
  1195. int st_new = SCE_PL_HERE_QQ;
  1196. HereDoc.State = 2;
  1197. if (HereDoc.Quoted) {
  1198. if (sc.state == SCE_PL_HERE_DELIM) {
  1199. // Missing quote at end of string! We are stricter than perl.
  1200. // Colour here-doc anyway while marking this bit as an error.
  1201. sc.ChangeState(SCE_PL_ERROR);
  1202. }
  1203. switch (HereDoc.Quote) {
  1204. case '\'':
  1205. st_new = SCE_PL_HERE_Q;
  1206. break;
  1207. case '"' :
  1208. st_new = SCE_PL_HERE_QQ;
  1209. break;
  1210. case '`' :
  1211. st_new = SCE_PL_HERE_QX;
  1212. break;
  1213. }
  1214. } else {
  1215. if (HereDoc.Quote == '\\')
  1216. st_new = SCE_PL_HERE_Q;
  1217. }
  1218. sc.SetState(st_new);
  1219. }
  1220. if (HereDoc.State == 3 && sc.atLineEnd) {
  1221. // Start of format body.
  1222. HereDoc.State = 0;
  1223. sc.SetState(SCE_PL_FORMAT);
  1224. }
  1225. // Determine if a new state should be entered.
  1226. if (sc.state == SCE_PL_DEFAULT) {
  1227. if (IsADigit(sc.ch) ||
  1228. (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
  1229. sc.SetState(SCE_PL_NUMBER);
  1230. backFlag = BACK_NONE;
  1231. numState = PERLNUM_DECIMAL;
  1232. dotCount = 0;
  1233. if (sc.ch == '0') { // hex,bin,octal
  1234. if (sc.chNext == 'x' || sc.chNext == 'X') {
  1235. numState = PERLNUM_HEX;
  1236. } else if (sc.chNext == 'b' || sc.chNext == 'B') {
  1237. numState = PERLNUM_BINARY;
  1238. } else if (IsADigit(sc.chNext)) {
  1239. numState = PERLNUM_OCTAL;
  1240. }
  1241. if (numState != PERLNUM_DECIMAL) {
  1242. sc.Forward();
  1243. }
  1244. } else if (sc.ch == 'v') { // vector
  1245. numState = PERLNUM_V_VECTOR;
  1246. }
  1247. } else if (setWord.Contains(sc.ch)) {
  1248. // if immediately prefixed by '::', always a bareword
  1249. sc.SetState(SCE_PL_WORD);
  1250. if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
  1251. sc.ChangeState(SCE_PL_IDENTIFIER);
  1252. }
  1253. Sci_PositionU bk = sc.currentPos;
  1254. Sci_PositionU fw = sc.currentPos + 1;
  1255. // first check for possible quote-like delimiter
  1256. if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
  1257. sc.ChangeState(SCE_PL_REGSUBST);
  1258. Quote.New(2);
  1259. } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
  1260. sc.ChangeState(SCE_PL_REGEX);
  1261. Quote.New();
  1262. } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
  1263. sc.ChangeState(SCE_PL_STRING_Q);
  1264. Quote.New();
  1265. } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
  1266. sc.ChangeState(SCE_PL_XLAT);
  1267. Quote.New(2);
  1268. } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
  1269. sc.ChangeState(SCE_PL_XLAT);
  1270. Quote.New(2);
  1271. sc.Forward();
  1272. fw++;
  1273. } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
  1274. && !setWord.Contains(sc.GetRelative(2))) {
  1275. if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
  1276. else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
  1277. else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
  1278. else sc.ChangeState(SCE_PL_STRING_QW); // sc.chNext == 'w'
  1279. Quote.New();
  1280. sc.Forward();
  1281. fw++;
  1282. } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
  1283. !setWord.Contains(sc.chNext) ||
  1284. (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
  1285. sc.ChangeState(SCE_PL_OPERATOR);
  1286. }
  1287. // if potentially a keyword, scan forward and grab word, then check
  1288. // if it's really one; if yes, disambiguation test is performed
  1289. // otherwise it is always a bareword and we skip a lot of scanning
  1290. if (sc.state == SCE_PL_WORD) {
  1291. while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
  1292. fw++;
  1293. if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
  1294. sc.ChangeState(SCE_PL_IDENTIFIER);
  1295. }
  1296. }
  1297. // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
  1298. // for quote-like delimiters/keywords, attempt to disambiguate
  1299. // to select for bareword, change state -> SCE_PL_IDENTIFIER
  1300. if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
  1301. if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
  1302. sc.ChangeState(SCE_PL_IDENTIFIER);
  1303. }
  1304. backFlag = BACK_NONE;
  1305. } else if (sc.ch == '#') {
  1306. sc.SetState(SCE_PL_COMMENTLINE);
  1307. } else if (sc.ch == '\"') {
  1308. sc.SetState(SCE_PL_STRING);
  1309. Quote.New();
  1310. Quote.Open(sc.ch);
  1311. backFlag = BACK_NONE;
  1312. } else if (sc.ch == '\'') {
  1313. if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
  1314. // Archaic call
  1315. sc.SetState(SCE_PL_IDENTIFIER);
  1316. } else {
  1317. sc.SetState(SCE_PL_CHARACTER);
  1318. Quote.New();
  1319. Quote.Open(sc.ch);
  1320. }
  1321. backFlag = BACK_NONE;
  1322. } else if (sc.ch == '`') {
  1323. sc.SetState(SCE_PL_BACKTICKS);
  1324. Quote.New();
  1325. Quote.Open(sc.ch);
  1326. backFlag = BACK_NONE;
  1327. } else if (sc.ch == '$') {
  1328. sc.SetState(SCE_PL_SCALAR);
  1329. if (sc.chNext == '{') {
  1330. sc.ForwardSetState(SCE_PL_OPERATOR);
  1331. } else if (IsASpace(sc.chNext)) {
  1332. sc.ForwardSetState(SCE_PL_DEFAULT);
  1333. } else {
  1334. sc.Forward();
  1335. if (sc.Match('`', '`') || sc.Match(':', ':')) {
  1336. sc.Forward();
  1337. }
  1338. }
  1339. backFlag = BACK_NONE;
  1340. } else if (sc.ch == '@') {
  1341. sc.SetState(SCE_PL_ARRAY);
  1342. if (setArray.Contains(sc.chNext)) {
  1343. // no special treatment
  1344. } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
  1345. sc.ForwardBytes(2);
  1346. } else if (sc.chNext == '{' || sc.chNext == '[') {
  1347. sc.ForwardSetState(SCE_PL_OPERATOR);
  1348. } else {
  1349. sc.ChangeState(SCE_PL_OPERATOR);
  1350. }
  1351. backFlag = BACK_NONE;
  1352. } else if (setPreferRE.Contains(sc.ch)) {
  1353. // Explicit backward peeking to set a consistent preferRE for
  1354. // any slash found, so no longer need to track preferRE state.
  1355. // Find first previous significant lexed element and interpret.
  1356. // A few symbols shares this code for disambiguation.
  1357. bool preferRE = false;
  1358. bool isHereDoc = sc.Match('<', '<');
  1359. bool hereDocSpace = false; // for: SCALAR [whitespace] '<<'
  1360. Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
  1361. sc.Complete();
  1362. styler.Flush();
  1363. if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
  1364. hereDocSpace = true;
  1365. skipWhitespaceComment(styler, bk);
  1366. if (bk == 0) {
  1367. // avoid backward scanning breakage
  1368. preferRE = true;
  1369. } else {
  1370. int bkstyle = styler.StyleAt(bk);
  1371. int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
  1372. switch (bkstyle) {
  1373. case SCE_PL_OPERATOR:
  1374. preferRE = true;
  1375. if (bkch == ')' || bkch == ']') {
  1376. preferRE = false;
  1377. } else if (bkch == '}') {
  1378. // backtrack by counting balanced brace pairs
  1379. // needed to test for variables like ${}, @{} etc.
  1380. bkstyle = styleBeforeBracePair(styler, bk);
  1381. if (bkstyle == SCE_PL_SCALAR
  1382. || bkstyle == SCE_PL_ARRAY
  1383. || bkstyle == SCE_PL_HASH
  1384. || bkstyle == SCE_PL_SYMBOLTABLE
  1385. || bkstyle == SCE_PL_OPERATOR) {
  1386. preferRE = false;
  1387. }
  1388. } else if (bkch == '+' || bkch == '-') {
  1389. if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
  1390. && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
  1391. // exceptions for operators: unary suffixes ++, --
  1392. preferRE = false;
  1393. }
  1394. break;
  1395. case SCE_PL_IDENTIFIER:
  1396. preferRE = true;
  1397. bkstyle = styleCheckIdentifier(styler, bk);
  1398. if ((bkstyle == 1) || (bkstyle == 2)) {
  1399. // inputsymbol or var with "->" or "::" before identifier
  1400. preferRE = false;
  1401. } else if (bkstyle == 3) {
  1402. // bare identifier, test cases follows:
  1403. if (sc.ch == '/') {
  1404. // if '/', /PATTERN/ unless digit/space immediately after '/'
  1405. // if '//', always expect defined-or operator to follow identifier
  1406. if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
  1407. preferRE = false;
  1408. } else if (sc.ch == '*' || sc.ch == '%') {
  1409. if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
  1410. preferRE = false;
  1411. } else if (sc.ch == '<') {
  1412. if (IsASpace(sc.chNext) || sc.chNext == '=')
  1413. preferRE = false;
  1414. }
  1415. }
  1416. break;
  1417. case SCE_PL_SCALAR: // for $var<< case:
  1418. if (isHereDoc && hereDocSpace) // if SCALAR whitespace '<<', *always* a HERE doc
  1419. preferRE = true;
  1420. break;
  1421. case SCE_PL_WORD:
  1422. preferRE = true;
  1423. // for HERE docs, always true
  1424. if (sc.ch == '/') {
  1425. // adopt heuristics similar to vim-style rules:
  1426. // keywords always forced as /PATTERN/: split, if, elsif, while
  1427. // everything else /PATTERN/ unless digit/space immediately after '/'
  1428. // for '//', defined-or favoured unless special keywords
  1429. Sci_PositionU bkend = bk + 1;
  1430. while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
  1431. bk--;
  1432. }
  1433. if (isPerlKeyword(bk, bkend, reWords, styler))
  1434. break;
  1435. if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
  1436. preferRE = false;
  1437. } else if (sc.ch == '*' || sc.ch == '%') {
  1438. if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
  1439. preferRE = false;
  1440. } else if (sc.ch == '<') {
  1441. if (IsASpace(sc.chNext) || sc.chNext == '=')
  1442. preferRE = false;
  1443. }
  1444. break;
  1445. // other styles uses the default, preferRE=false
  1446. case SCE_PL_POD:
  1447. case SCE_PL_HERE_Q:
  1448. case SCE_PL_HERE_QQ:
  1449. case SCE_PL_HERE_QX:
  1450. preferRE = true;
  1451. break;
  1452. }
  1453. }
  1454. backFlag = BACK_NONE;
  1455. if (isHereDoc) { // handle '<<', HERE doc
  1456. if (sc.Match("<<>>")) { // double-diamond operator (5.22)
  1457. sc.SetState(SCE_PL_OPERATOR);
  1458. sc.Forward(3);
  1459. } else if (preferRE) {
  1460. sc.SetState(SCE_PL_HERE_DELIM);
  1461. HereDoc.State = 0;
  1462. } else { // << operator
  1463. sc.SetState(SCE_PL_OPERATOR);
  1464. sc.Forward();
  1465. }
  1466. } else if (sc.ch == '*') { // handle '*', typeglob
  1467. if (preferRE) {
  1468. sc.SetState(SCE_PL_SYMBOLTABLE);
  1469. if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
  1470. sc.ForwardBytes(2);
  1471. } else if (sc.chNext == '{') {
  1472. sc.ForwardSetState(SCE_PL_OPERATOR);
  1473. } else {
  1474. sc.Forward();
  1475. }
  1476. } else {
  1477. sc.SetState(SCE_PL_OPERATOR);
  1478. if (sc.chNext == '*') // exponentiation
  1479. sc.Forward();
  1480. }
  1481. } else if (sc.ch == '%') { // handle '%', hash
  1482. if (preferRE) {
  1483. sc.SetState(SCE_PL_HASH);
  1484. if (setHash.Contains(sc.chNext)) {
  1485. sc.Forward();
  1486. } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
  1487. sc.ForwardBytes(2);
  1488. } else if (sc.chNext == '{') {
  1489. sc.ForwardSetState(SCE_PL_OPERATOR);
  1490. } else {
  1491. sc.ChangeState(SCE_PL_OPERATOR);
  1492. }
  1493. } else {
  1494. sc.SetState(SCE_PL_OPERATOR);
  1495. }
  1496. } else if (sc.ch == '<') { // handle '<', inputsymbol
  1497. if (preferRE) {
  1498. // forward scan
  1499. int i = InputSymbolScan(sc);
  1500. if (i > 0) {
  1501. sc.SetState(SCE_PL_IDENTIFIER);
  1502. sc.Forward(i);
  1503. } else {
  1504. sc.SetState(SCE_PL_OPERATOR);
  1505. }
  1506. } else {
  1507. sc.SetState(SCE_PL_OPERATOR);
  1508. }
  1509. } else { // handle '/', regexp
  1510. if (preferRE) {
  1511. sc.SetState(SCE_PL_REGEX);
  1512. Quote.New();
  1513. Quote.Open(sc.ch);
  1514. } else { // / and // operators
  1515. sc.SetState(SCE_PL_OPERATOR);
  1516. if (sc.chNext == '/') {
  1517. sc.Forward();
  1518. }
  1519. }
  1520. }
  1521. } else if (sc.ch == '=' // POD
  1522. && setPOD.Contains(sc.chNext)
  1523. && sc.atLineStart) {
  1524. sc.SetState(SCE_PL_POD);
  1525. backFlag = BACK_NONE;
  1526. } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) { // extended '-' cases
  1527. Sci_PositionU bk = sc.currentPos;
  1528. Sci_PositionU fw = 2;
  1529. if (setSingleCharOp.Contains(sc.chNext) && // file test operators
  1530. !setWord.Contains(sc.GetRelative(2))) {
  1531. sc.SetState(SCE_PL_WORD);
  1532. } else {
  1533. // nominally a minus and bareword; find extent of bareword
  1534. while (setWord.Contains(sc.GetRelative(fw)))
  1535. fw++;
  1536. sc.SetState(SCE_PL_OPERATOR);
  1537. }
  1538. // force to bareword for hash key => or {variable literal} cases
  1539. if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
  1540. sc.ChangeState(SCE_PL_IDENTIFIER);
  1541. }
  1542. backFlag = BACK_NONE;
  1543. } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
  1544. sc.Complete();
  1545. if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
  1546. sc.SetState(SCE_PL_SUB_PROTOTYPE);
  1547. backFlag = BACK_NONE;
  1548. } else {
  1549. sc.SetState(SCE_PL_OPERATOR);
  1550. }
  1551. } else if (setPerlOperator.Contains(sc.ch)) { // operators
  1552. sc.SetState(SCE_PL_OPERATOR);
  1553. if (sc.Match('.', '.')) { // .. and ...
  1554. sc.Forward();
  1555. if (sc.chNext == '.') sc.Forward();
  1556. }
  1557. } else if (sc.ch == 4 || sc.ch == 26) { // ^D and ^Z ends valid perl source
  1558. sc.SetState(SCE_PL_DATASECTION);
  1559. } else {
  1560. // keep colouring defaults
  1561. sc.Complete();
  1562. }
  1563. }
  1564. }
  1565. sc.Complete();
  1566. if (sc.state == SCE_PL_HERE_Q
  1567. || sc.state == SCE_PL_HERE_QQ
  1568. || sc.state == SCE_PL_HERE_QX
  1569. || sc.state == SCE_PL_FORMAT) {
  1570. styler.ChangeLexerState(sc.currentPos, styler.Length());
  1571. }
  1572. sc.Complete();
  1573. }
  1574. #define PERL_HEADFOLD_SHIFT 4
  1575. #define PERL_HEADFOLD_MASK 0xF0
  1576. void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
  1577. if (!options.fold)
  1578. return;
  1579. LexAccessor styler(pAccess);
  1580. Sci_PositionU endPos = startPos + length;
  1581. int visibleChars = 0;
  1582. Sci_Position lineCurrent = styler.GetLine(startPos);
  1583. // Backtrack to previous line in case need to fix its fold status
  1584. if (startPos > 0) {
  1585. if (lineCurrent > 0) {
  1586. lineCurrent--;
  1587. startPos = styler.LineStart(lineCurrent);
  1588. }
  1589. }
  1590. int levelPrev = SC_FOLDLEVELBASE;
  1591. if (lineCurrent > 0)
  1592. levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
  1593. int levelCurrent = levelPrev;
  1594. char chNext = styler[startPos];
  1595. char chPrev = styler.SafeGetCharAt(startPos - 1);
  1596. int styleNext = styler.StyleAt(startPos);
  1597. // Used at end of line to determine if the line was a package definition
  1598. bool isPackageLine = false;
  1599. int podHeading = 0;
  1600. for (Sci_PositionU i = startPos; i < endPos; i++) {
  1601. char ch = chNext;
  1602. chNext = styler.SafeGetCharAt(i + 1);
  1603. int style = styleNext;
  1604. styleNext = styler.StyleAt(i + 1);
  1605. int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
  1606. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  1607. bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
  1608. // Comment folding
  1609. if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
  1610. if (!IsCommentLine(lineCurrent - 1, styler)
  1611. && IsCommentLine(lineCurrent + 1, styler))
  1612. levelCurrent++;
  1613. else if (IsCommentLine(lineCurrent - 1, styler)
  1614. && !IsCommentLine(lineCurrent + 1, styler))
  1615. levelCurrent--;
  1616. }
  1617. // {} [] block folding
  1618. if (style == SCE_PL_OPERATOR) {
  1619. if (ch == '{') {
  1620. if (options.foldAtElse && levelCurrent < levelPrev)
  1621. --levelPrev;
  1622. levelCurrent++;
  1623. } else if (ch == '}') {
  1624. levelCurrent--;
  1625. }
  1626. if (ch == '[') {
  1627. if (options.foldAtElse && levelCurrent < levelPrev)
  1628. --levelPrev;
  1629. levelCurrent++;
  1630. } else if (ch == ']') {
  1631. levelCurrent--;
  1632. }
  1633. }
  1634. // POD folding
  1635. if (options.foldPOD && atLineStart) {
  1636. if (style == SCE_PL_POD) {
  1637. if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
  1638. levelCurrent++;
  1639. else if (styler.Match(i, "=cut"))
  1640. levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
  1641. else if (styler.Match(i, "=head"))
  1642. podHeading = PodHeadingLevel(i, styler);
  1643. } else if (style == SCE_PL_DATASECTION) {
  1644. if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
  1645. levelCurrent++;
  1646. else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
  1647. levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
  1648. else if (styler.Match(i, "=head"))
  1649. podHeading = PodHeadingLevel(i, styler);
  1650. // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
  1651. // reset needed as level test is vs. SC_FOLDLEVELBASE
  1652. else if (stylePrevCh != SCE_PL_DATASECTION)
  1653. levelCurrent = SC_FOLDLEVELBASE;
  1654. }
  1655. }
  1656. // package folding
  1657. if (options.foldPackage && atLineStart) {
  1658. if (IsPackageLine(lineCurrent, styler)
  1659. && !IsPackageLine(lineCurrent + 1, styler))
  1660. isPackageLine = true;
  1661. }
  1662. //heredoc folding
  1663. switch (style) {
  1664. case SCE_PL_HERE_QQ :
  1665. case SCE_PL_HERE_Q :
  1666. case SCE_PL_HERE_QX :
  1667. switch (stylePrevCh) {
  1668. case SCE_PL_HERE_QQ :
  1669. case SCE_PL_HERE_Q :
  1670. case SCE_PL_HERE_QX :
  1671. //do nothing;
  1672. break;
  1673. default :
  1674. levelCurrent++;
  1675. break;
  1676. }
  1677. break;
  1678. default:
  1679. switch (stylePrevCh) {
  1680. case SCE_PL_HERE_QQ :
  1681. case SCE_PL_HERE_Q :
  1682. case SCE_PL_HERE_QX :
  1683. levelCurrent--;
  1684. break;
  1685. default :
  1686. //do nothing;
  1687. break;
  1688. }
  1689. break;
  1690. }
  1691. //explicit folding
  1692. if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
  1693. if (chNext == '{') {
  1694. levelCurrent++;
  1695. } else if (levelCurrent > SC_FOLDLEVELBASE && chNext == '}') {
  1696. levelCurrent--;
  1697. }
  1698. }
  1699. if (atEOL) {
  1700. int lev = levelPrev;
  1701. // POD headings occupy bits 7-4, leaving some breathing room for
  1702. // non-standard practice -- POD sections stuck in blocks, etc.
  1703. if (podHeading > 0) {
  1704. levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
  1705. lev = levelCurrent - 1;
  1706. lev |= SC_FOLDLEVELHEADERFLAG;
  1707. podHeading = 0;
  1708. }
  1709. // Check if line was a package declaration
  1710. // because packages need "special" treatment
  1711. if (isPackageLine) {
  1712. lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
  1713. levelCurrent = SC_FOLDLEVELBASE + 1;
  1714. isPackageLine = false;
  1715. }
  1716. lev |= levelCurrent << 16;
  1717. if (visibleChars == 0 && options.foldCompact)
  1718. lev |= SC_FOLDLEVELWHITEFLAG;
  1719. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  1720. lev |= SC_FOLDLEVELHEADERFLAG;
  1721. if (lev != styler.LevelAt(lineCurrent)) {
  1722. styler.SetLevel(lineCurrent, lev);
  1723. }
  1724. lineCurrent++;
  1725. levelPrev = levelCurrent;
  1726. visibleChars = 0;
  1727. }
  1728. if (!isspacechar(ch))
  1729. visibleChars++;
  1730. chPrev = ch;
  1731. }
  1732. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  1733. int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
  1734. styler.SetLevel(lineCurrent, levelPrev | flagsNext);
  1735. }
  1736. LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);