CharacterSet.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. // Scintilla source code edit control
  2. /** @file CharacterSet.h
  3. ** Encapsulates a set of characters. Used to test if a character is within a set.
  4. **/
  5. // Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #ifndef CHARACTERSET_H
  8. #define CHARACTERSET_H
  9. #ifdef SCI_NAMESPACE
  10. namespace Scintilla {
  11. #endif
  12. class CharacterSet {
  13. int size;
  14. bool valueAfter;
  15. bool *bset;
  16. public:
  17. enum setBase {
  18. setNone=0,
  19. setLower=1,
  20. setUpper=2,
  21. setDigits=4,
  22. setAlpha=setLower|setUpper,
  23. setAlphaNum=setAlpha|setDigits
  24. };
  25. CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) {
  26. size = size_;
  27. valueAfter = valueAfter_;
  28. bset = new bool[size];
  29. for (int i=0; i < size; i++) {
  30. bset[i] = false;
  31. }
  32. AddString(initialSet);
  33. if (base & setLower)
  34. AddString("abcdefghijklmnopqrstuvwxyz");
  35. if (base & setUpper)
  36. AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
  37. if (base & setDigits)
  38. AddString("0123456789");
  39. }
  40. CharacterSet(const CharacterSet &other) {
  41. size = other.size;
  42. valueAfter = other.valueAfter;
  43. bset = new bool[size];
  44. for (int i=0; i < size; i++) {
  45. bset[i] = other.bset[i];
  46. }
  47. }
  48. ~CharacterSet() {
  49. delete []bset;
  50. bset = 0;
  51. size = 0;
  52. }
  53. CharacterSet &operator=(const CharacterSet &other) {
  54. if (this != &other) {
  55. bool *bsetNew = new bool[other.size];
  56. for (int i=0; i < other.size; i++) {
  57. bsetNew[i] = other.bset[i];
  58. }
  59. delete []bset;
  60. size = other.size;
  61. valueAfter = other.valueAfter;
  62. bset = bsetNew;
  63. }
  64. return *this;
  65. }
  66. void Add(int val) {
  67. assert(val >= 0);
  68. assert(val < size);
  69. bset[val] = true;
  70. }
  71. void AddString(const char *setToAdd) {
  72. for (const char *cp=setToAdd; *cp; cp++) {
  73. int val = static_cast<unsigned char>(*cp);
  74. assert(val >= 0);
  75. assert(val < size);
  76. bset[val] = true;
  77. }
  78. }
  79. bool Contains(int val) const {
  80. // val being -ve is valid (or there is a sign extension bug elsewhere.
  81. //assert(val >= 0);
  82. if (val < 0) return false;
  83. return (val < size) ? bset[val] : valueAfter;
  84. }
  85. };
  86. // Functions for classifying characters
  87. inline bool IsASpace(int ch) {
  88. return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
  89. }
  90. inline bool IsASpaceOrTab(int ch) {
  91. return (ch == ' ') || (ch == '\t');
  92. }
  93. inline bool IsADigit(int ch) {
  94. return (ch >= '0') && (ch <= '9');
  95. }
  96. inline bool IsADigit(int ch, int base) {
  97. if (base <= 10) {
  98. return (ch >= '0') && (ch < '0' + base);
  99. } else {
  100. return ((ch >= '0') && (ch <= '9')) ||
  101. ((ch >= 'A') && (ch < 'A' + base - 10)) ||
  102. ((ch >= 'a') && (ch < 'a' + base - 10));
  103. }
  104. }
  105. inline bool IsASCII(int ch) {
  106. return (ch >= 0) && (ch < 0x80);
  107. }
  108. inline bool IsLowerCase(int ch) {
  109. return (ch >= 'a') && (ch <= 'z');
  110. }
  111. inline bool IsUpperCase(int ch) {
  112. return (ch >= 'A') && (ch <= 'Z');
  113. }
  114. inline bool IsAlphaNumeric(int ch) {
  115. return
  116. ((ch >= '0') && (ch <= '9')) ||
  117. ((ch >= 'a') && (ch <= 'z')) ||
  118. ((ch >= 'A') && (ch <= 'Z'));
  119. }
  120. /**
  121. * Check if a character is a space.
  122. * This is ASCII specific but is safe with chars >= 0x80.
  123. */
  124. inline bool isspacechar(int ch) {
  125. return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
  126. }
  127. inline bool iswordchar(int ch) {
  128. return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
  129. }
  130. inline bool iswordstart(int ch) {
  131. return IsAlphaNumeric(ch) || ch == '_';
  132. }
  133. inline bool isoperator(int ch) {
  134. if (IsAlphaNumeric(ch))
  135. return false;
  136. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  137. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  138. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  139. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  140. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  141. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  142. return true;
  143. return false;
  144. }
  145. // Simple case functions for ASCII.
  146. inline int MakeUpperCase(int ch) {
  147. if (ch < 'a' || ch > 'z')
  148. return ch;
  149. else
  150. return static_cast<char>(ch - 'a' + 'A');
  151. }
  152. inline int MakeLowerCase(int ch) {
  153. if (ch < 'A' || ch > 'Z')
  154. return ch;
  155. else
  156. return ch - 'A' + 'a';
  157. }
  158. int CompareCaseInsensitive(const char *a, const char *b);
  159. int CompareNCaseInsensitive(const char *a, const char *b, size_t len);
  160. #ifdef SCI_NAMESPACE
  161. }
  162. #endif
  163. #endif