Coverage Summary for Class: CharMatcher (com.google.common.base)

Class Method, % Line, %
CharMatcher 7.8% (4/51) 5% (9/180)
CharMatcher$1 0% (0/2) 0% (0/2)
CharMatcher$And 0% (0/4) 0% (0/13)
CharMatcher$Any 0% (0/17) 0% (0/29)
CharMatcher$AnyOf 0% (0/4) 0% (0/13)
CharMatcher$Ascii 0% (0/3) 0% (0/4)
CharMatcher$BitSetMatcher 0% (0/4) 0% (0/9)
CharMatcher$BreakingWhitespace 0% (0/4) 0% (0/7)
CharMatcher$Digit 0% (0/4) 0% (0/8)
CharMatcher$FastMatcher 33.3% (1/3) 33.3% (1/3)
CharMatcher$ForPredicate 0% (0/4) 0% (0/6)
CharMatcher$InRange 0% (0/4) 0% (0/11)
CharMatcher$Invisible 0% (0/2) 0% (0/3)
CharMatcher$Is 25% (2/8) 36.4% (4/11)
CharMatcher$IsEither 0% (0/4) 0% (0/9)
CharMatcher$IsNot 0% (0/7) 0% (0/11)
CharMatcher$JavaDigit 0% (0/4) 0% (0/4)
CharMatcher$JavaIsoControl 0% (0/3) 0% (0/4)
CharMatcher$JavaLetter 0% (0/4) 0% (0/4)
CharMatcher$JavaLetterOrDigit 0% (0/4) 0% (0/4)
CharMatcher$JavaLowerCase 0% (0/4) 0% (0/4)
CharMatcher$JavaUpperCase 0% (0/4) 0% (0/4)
CharMatcher$NamedFastMatcher 50% (1/2) 75% (3/4)
CharMatcher$Negated 0% (0/8) 0% (0/14)
CharMatcher$NegatedFastMatcher 0% (0/2) 0% (0/3)
CharMatcher$None 15.8% (3/19) 14.3% (4/28)
CharMatcher$Or 0% (0/4) 0% (0/9)
CharMatcher$RangesMatcher 0% (0/3) 0% (0/16)
CharMatcher$SingleWidth 0% (0/2) 0% (0/5)
CharMatcher$Whitespace 0% (0/4) 0% (0/8)
Total 5.7% (11/192) 4.9% (21/430)


1 /* 2  * Copyright (C) 2008 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 package com.google.common.base; 16  17 import static com.google.common.base.Preconditions.checkArgument; 18 import static com.google.common.base.Preconditions.checkNotNull; 19 import static com.google.common.base.Preconditions.checkPositionIndex; 20  21 import com.google.common.annotations.GwtCompatible; 22 import com.google.common.annotations.GwtIncompatible; 23 import com.google.common.annotations.VisibleForTesting; 24 import java.util.Arrays; 25 import java.util.BitSet; 26  27 /** 28  * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does 29  * for any {@link Object}. Also offers basic text processing methods based on this function. 30  * Implementations are strongly encouraged to be side-effect-free and immutable. 31  * 32  * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean 33  * "any {@code char} value {@code c} for which {@code this.matches(c)} returns {@code true}". 34  * 35  * <p><b>Warning:</b> This class deals only with {@code char} values, that is, <a 36  * href="http://www.unicode.org/glossary/#BMP_character">BMP characters</a>. It does not understand 37  * <a href="http://www.unicode.org/glossary/#supplementary_code_point">supplementary Unicode code 38  * points</a> in the range {@code 0x10000} to {@code 0x10FFFF} which includes the majority of 39  * assigned characters, including important CJK characters and emoji. 40  * 41  * <p>Supplementary characters are <a 42  * href="https://docs.oracle.com/javase/8/docs/api/java/lang/Character.html#supplementary">encoded 43  * into a {@code String} using surrogate pairs</a>, and a {@code CharMatcher} treats these just as 44  * two separate characters. {@link #countIn} counts each supplementary character as 2 {@code char}s. 45  * 46  * <p>For up-to-date Unicode character properties (digit, letter, etc.) and support for 47  * supplementary code points, use ICU4J UCharacter and UnicodeSet (freeze() after building). For 48  * basic text processing based on UnicodeSet use the ICU4J UnicodeSetSpanner. 49  * 50  * <p>Example usages: 51  * 52  * <pre> 53  * String trimmed = {@link #whitespace() whitespace()}.{@link #trimFrom trimFrom}(userInput); 54  * if ({@link #ascii() ascii()}.{@link #matchesAllOf matchesAllOf}(s)) { ... }</pre> 55  * 56  * <p>See the Guava User Guide article on <a 57  * href="https://github.com/google/guava/wiki/StringsExplained#charmatcher">{@code CharMatcher} 58  * </a>. 59  * 60  * @author Kevin Bourrillion 61  * @since 1.0 62  */ 63 @GwtCompatible(emulated = true) 64 @ElementTypesAreNonnullByDefault 65 public abstract class CharMatcher implements Predicate<Character> { 66  /* 67  * N777777777NO 68  * N7777777777777N 69  * M777777777777777N 70  * $N877777777D77777M 71  * N M77777777ONND777M 72  * MN777777777NN D777 73  * N7ZN777777777NN ~M7778 74  * N777777777777MMNN88777N 75  * N777777777777MNZZZ7777O 76  * DZN7777O77777777777777 77  * N7OONND7777777D77777N 78  * 8$M++++?N???$77777$ 79  * M7++++N+M77777777N 80  * N77O777777777777$ M 81  * DNNM$$$$777777N D 82  * N$N:=N$777N7777M NZ 83  * 77Z::::N777777777 ODZZZ 84  * 77N::::::N77777777M NNZZZ$ 85  * $777:::::::77777777MN ZM8ZZZZZ 86  * 777M::::::Z7777777Z77 N++ZZZZNN 87  * 7777M:::::M7777777$777M $++IZZZZM 88  * M777$:::::N777777$M7777M +++++ZZZDN 89  * NN$::::::7777$$M777777N N+++ZZZZNZ 90  * N::::::N:7$O:77777777 N++++ZZZZN 91  * M::::::::::::N77777777+ +?+++++ZZZM 92  * 8::::::::::::D77777777M O+++++ZZ 93  * ::::::::::::M777777777N O+?D 94  * M:::::::::::M77777777778 77= 95  * D=::::::::::N7777777777N 777 96  * INN===::::::=77777777777N I777N 97  * ?777N========N7777777777787M N7777 98  * 77777$D======N77777777777N777N? N777777 99  * I77777$$$N7===M$$77777777$77777777$MMZ77777777N 100  * $$$$$$$$$$$NIZN$$$$$$$$$M$$7777777777777777ON 101  * M$$$$$$$$M M$$$$$$$$N=N$$$$7777777$$$ND 102  * O77Z$$$$$$$ M$$$$$$$$MNI==$DNNNNM=~N 103  * 7 :N MNN$$$$M$ $$$777$8 8D8I 104  * NMM.:7O 777777778 105  * 7777777MN 106  * M NO .7: 107  * M : M 108  * 8 109  */ 110  111  // Constant matcher factory methods 112  113  /** 114  * Matches any character. 115  * 116  * @since 19.0 (since 1.0 as constant {@code ANY}) 117  */ 118  public static CharMatcher any() { 119  return Any.INSTANCE; 120  } 121  122  /** 123  * Matches no characters. 124  * 125  * @since 19.0 (since 1.0 as constant {@code NONE}) 126  */ 127  public static CharMatcher none() { 128  return None.INSTANCE; 129  } 130  131  /** 132  * Determines whether a character is whitespace according to the latest Unicode standard, as 133  * illustrated <a 134  * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>. 135  * This is not the same definition used by other Java APIs. (See a <a 136  * href="https://goo.gl/Y6SLWx">comparison of several definitions of "whitespace"</a>.) 137  * 138  * <p>All Unicode White_Space characters are on the BMP and thus supported by this API. 139  * 140  * <p><b>Note:</b> as the Unicode definition evolves, we will modify this matcher to keep it up to 141  * date. 142  * 143  * @since 19.0 (since 1.0 as constant {@code WHITESPACE}) 144  */ 145  public static CharMatcher whitespace() { 146  return Whitespace.INSTANCE; 147  } 148  149  /** 150  * Determines whether a character is a breaking whitespace (that is, a whitespace which can be 151  * interpreted as a break between words for formatting purposes). See {@link #whitespace()} for a 152  * discussion of that term. 153  * 154  * @since 19.0 (since 2.0 as constant {@code BREAKING_WHITESPACE}) 155  */ 156  public static CharMatcher breakingWhitespace() { 157  return BreakingWhitespace.INSTANCE; 158  } 159  160  /** 161  * Determines whether a character is ASCII, meaning that its code point is less than 128. 162  * 163  * @since 19.0 (since 1.0 as constant {@code ASCII}) 164  */ 165  public static CharMatcher ascii() { 166  return Ascii.INSTANCE; 167  } 168  169  /** 170  * Determines whether a character is a BMP digit according to <a 171  * href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>. If 172  * you only care to match ASCII digits, you can use {@code inRange('0', '9')}. 173  * 174  * @deprecated Many digits are supplementary characters; see the class documentation. 175  * @since 19.0 (since 1.0 as constant {@code DIGIT}) 176  */ 177  @Deprecated 178  public static CharMatcher digit() { 179  return Digit.INSTANCE; 180  } 181  182  /** 183  * Determines whether a character is a BMP digit according to {@linkplain Character#isDigit(char) 184  * Java's definition}. If you only care to match ASCII digits, you can use {@code inRange('0', 185  * '9')}. 186  * 187  * @deprecated Many digits are supplementary characters; see the class documentation. 188  * @since 19.0 (since 1.0 as constant {@code JAVA_DIGIT}) 189  */ 190  @Deprecated 191  public static CharMatcher javaDigit() { 192  return JavaDigit.INSTANCE; 193  } 194  195  /** 196  * Determines whether a character is a BMP letter according to {@linkplain 197  * Character#isLetter(char) Java's definition}. If you only care to match letters of the Latin 198  * alphabet, you can use {@code inRange('a', 'z').or(inRange('A', 'Z'))}. 199  * 200  * @deprecated Most letters are supplementary characters; see the class documentation. 201  * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER}) 202  */ 203  @Deprecated 204  public static CharMatcher javaLetter() { 205  return JavaLetter.INSTANCE; 206  } 207  208  /** 209  * Determines whether a character is a BMP letter or digit according to {@linkplain 210  * Character#isLetterOrDigit(char) Java's definition}. 211  * 212  * @deprecated Most letters and digits are supplementary characters; see the class documentation. 213  * @since 19.0 (since 1.0 as constant {@code JAVA_LETTER_OR_DIGIT}). 214  */ 215  @Deprecated 216  public static CharMatcher javaLetterOrDigit() { 217  return JavaLetterOrDigit.INSTANCE; 218  } 219  220  /** 221  * Determines whether a BMP character is upper case according to {@linkplain 222  * Character#isUpperCase(char) Java's definition}. 223  * 224  * @deprecated Some uppercase characters are supplementary characters; see the class 225  * documentation. 226  * @since 19.0 (since 1.0 as constant {@code JAVA_UPPER_CASE}) 227  */ 228  @Deprecated 229  public static CharMatcher javaUpperCase() { 230  return JavaUpperCase.INSTANCE; 231  } 232  233  /** 234  * Determines whether a BMP character is lower case according to {@linkplain 235  * Character#isLowerCase(char) Java's definition}. 236  * 237  * @deprecated Some lowercase characters are supplementary characters; see the class 238  * documentation. 239  * @since 19.0 (since 1.0 as constant {@code JAVA_LOWER_CASE}) 240  */ 241  @Deprecated 242  public static CharMatcher javaLowerCase() { 243  return JavaLowerCase.INSTANCE; 244  } 245  246  /** 247  * Determines whether a character is an ISO control character as specified by {@link 248  * Character#isISOControl(char)}. 249  * 250  * <p>All ISO control codes are on the BMP and thus supported by this API. 251  * 252  * @since 19.0 (since 1.0 as constant {@code JAVA_ISO_CONTROL}) 253  */ 254  public static CharMatcher javaIsoControl() { 255  return JavaIsoControl.INSTANCE; 256  } 257  258  /** 259  * Determines whether a character is invisible; that is, if its Unicode category is any of 260  * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and 261  * PRIVATE_USE according to ICU4J. 262  * 263  * <p>See also the Unicode Default_Ignorable_Code_Point property (available via ICU). 264  * 265  * @deprecated Most invisible characters are supplementary characters; see the class 266  * documentation. 267  * @since 19.0 (since 1.0 as constant {@code INVISIBLE}) 268  */ 269  @Deprecated 270  public static CharMatcher invisible() { 271  return Invisible.INSTANCE; 272  } 273  274  /** 275  * Determines whether a character is single-width (not double-width). When in doubt, this matcher 276  * errs on the side of returning {@code false} (that is, it tends to assume a character is 277  * double-width). 278  * 279  * <p><b>Note:</b> as the reference file evolves, we will modify this matcher to keep it up to 280  * date. 281  * 282  * <p>See also <a href="http://www.unicode.org/reports/tr11/">UAX #11 East Asian Width</a>. 283  * 284  * @deprecated Many such characters are supplementary characters; see the class documentation. 285  * @since 19.0 (since 1.0 as constant {@code SINGLE_WIDTH}) 286  */ 287  @Deprecated 288  public static CharMatcher singleWidth() { 289  return SingleWidth.INSTANCE; 290  } 291  292  // Static factories 293  294  /** Returns a {@code char} matcher that matches only one specified BMP character. */ 295  public static CharMatcher is(final char match) { 296  return new Is(match); 297  } 298  299  /** 300  * Returns a {@code char} matcher that matches any character except the BMP character specified. 301  * 302  * <p>To negate another {@code CharMatcher}, use {@link #negate()}. 303  */ 304  public static CharMatcher isNot(final char match) { 305  return new IsNot(match); 306  } 307  308  /** 309  * Returns a {@code char} matcher that matches any BMP character present in the given character 310  * sequence. Returns a bogus matcher if the sequence contains supplementary characters. 311  */ 312  public static CharMatcher anyOf(final CharSequence sequence) { 313  switch (sequence.length()) { 314  case 0: 315  return none(); 316  case 1: 317  return is(sequence.charAt(0)); 318  case 2: 319  return isEither(sequence.charAt(0), sequence.charAt(1)); 320  default: 321  // TODO(lowasser): is it potentially worth just going ahead and building a precomputed 322  // matcher? 323  return new AnyOf(sequence); 324  } 325  } 326  327  /** 328  * Returns a {@code char} matcher that matches any BMP character not present in the given 329  * character sequence. Returns a bogus matcher if the sequence contains supplementary characters. 330  */ 331  public static CharMatcher noneOf(CharSequence sequence) { 332  return anyOf(sequence).negate(); 333  } 334  335  /** 336  * Returns a {@code char} matcher that matches any character in a given BMP range (both endpoints 337  * are inclusive). For example, to match any lowercase letter of the English alphabet, use {@code 338  * CharMatcher.inRange('a', 'z')}. 339  * 340  * @throws IllegalArgumentException if {@code endInclusive < startInclusive} 341  */ 342  public static CharMatcher inRange(final char startInclusive, final char endInclusive) { 343  return new InRange(startInclusive, endInclusive); 344  } 345  346  /** 347  * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but 348  * which operates on primitive {@code char} instances instead. 349  */ 350  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) { 351  return predicate instanceof CharMatcher ? (CharMatcher) predicate : new ForPredicate(predicate); 352  } 353  354  // Constructors 355  356  /** 357  * Constructor for use by subclasses. When subclassing, you may want to override {@code 358  * toString()} to provide a useful description. 359  */ 360  protected CharMatcher() {} 361  362  // Abstract methods 363  364  /** Determines a true or false value for the given character. */ 365  public abstract boolean matches(char c); 366  367  // Non-static factories 368  369  /** Returns a matcher that matches any character not matched by this matcher. */ 370  // @Override under Java 8 but not under Java 7 371  @Override 372  public CharMatcher negate() { 373  return new Negated(this); 374  } 375  376  /** 377  * Returns a matcher that matches any character matched by both this matcher and {@code other}. 378  */ 379  public CharMatcher and(CharMatcher other) { 380  return new And(this, other); 381  } 382  383  /** 384  * Returns a matcher that matches any character matched by either this matcher or {@code other}. 385  */ 386  public CharMatcher or(CharMatcher other) { 387  return new Or(this, other); 388  } 389  390  /** 391  * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to 392  * query than the original; your mileage may vary. Precomputation takes time and is likely to be 393  * worthwhile only if the precomputed matcher is queried many thousands of times. 394  * 395  * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a 396  * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a 397  * worthwhile tradeoff in a browser. 398  */ 399  public CharMatcher precomputed() { 400  return Platform.precomputeCharMatcher(this); 401  } 402  403  private static final int DISTINCT_CHARS = Character.MAX_VALUE - Character.MIN_VALUE + 1; 404  405  /** 406  * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method 407  * on {@link Platform} so that we can have different behavior in GWT. 408  * 409  * <p>This implementation tries to be smart in a number of ways. It recognizes cases where the 410  * negation is cheaper to precompute than the matcher itself; it tries to build small hash tables 411  * for matchers that only match a few characters, and so on. In the worst-case scenario, it 412  * constructs an eight-kilobyte bit array and queries that. In many situations this produces a 413  * matcher which is faster to query than the original. 414  */ 415  @GwtIncompatible // SmallCharMatcher 416  CharMatcher precomputedInternal() { 417  final BitSet table = new BitSet(); 418  setBits(table); 419  int totalCharacters = table.cardinality(); 420  if (totalCharacters * 2 <= DISTINCT_CHARS) { 421  return precomputedPositive(totalCharacters, table, toString()); 422  } else { 423  // TODO(lowasser): is it worth it to worry about the last character of large matchers? 424  table.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 425  int negatedCharacters = DISTINCT_CHARS - totalCharacters; 426  String suffix = ".negate()"; 427  final String description = toString(); 428  String negatedDescription = 429  description.endsWith(suffix) 430  ? description.substring(0, description.length() - suffix.length()) 431  : description + suffix; 432  return new NegatedFastMatcher( 433  precomputedPositive(negatedCharacters, table, negatedDescription)) { 434  @Override 435  public String toString() { 436  return description; 437  } 438  }; 439  } 440  } 441  442  /** 443  * Helper method for {@link #precomputedInternal} that doesn't test if the negation is cheaper. 444  */ 445  @GwtIncompatible // SmallCharMatcher 446  private static CharMatcher precomputedPositive( 447  int totalCharacters, BitSet table, String description) { 448  switch (totalCharacters) { 449  case 0: 450  return none(); 451  case 1: 452  return is((char) table.nextSetBit(0)); 453  case 2: 454  char c1 = (char) table.nextSetBit(0); 455  char c2 = (char) table.nextSetBit(c1 + 1); 456  return isEither(c1, c2); 457  default: 458  return isSmall(totalCharacters, table.length()) 459  ? SmallCharMatcher.from(table, description) 460  : new BitSetMatcher(table, description); 461  } 462  } 463  464  @GwtIncompatible // SmallCharMatcher 465  private static boolean isSmall(int totalCharacters, int tableLength) { 466  return totalCharacters <= SmallCharMatcher.MAX_SIZE 467  && tableLength > (totalCharacters * 4 * Character.SIZE); 468  // err on the side of BitSetMatcher 469  } 470  471  /** Sets bits in {@code table} matched by this matcher. */ 472  @GwtIncompatible // used only from other GwtIncompatible code 473  void setBits(BitSet table) { 474  for (int c = Character.MAX_VALUE; c >= Character.MIN_VALUE; c--) { 475  if (matches((char) c)) { 476  table.set(c); 477  } 478  } 479  } 480  481  // Text processing routines 482  483  /** 484  * Returns {@code true} if a character sequence contains at least one matching BMP character. 485  * Equivalent to {@code !matchesNoneOf(sequence)}. 486  * 487  * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 488  * character, until this returns {@code true} or the end is reached. 489  * 490  * @param sequence the character sequence to examine, possibly empty 491  * @return {@code true} if this matcher matches at least one character in the sequence 492  * @since 8.0 493  */ 494  public boolean matchesAnyOf(CharSequence sequence) { 495  return !matchesNoneOf(sequence); 496  } 497  498  /** 499  * Returns {@code true} if a character sequence contains only matching BMP characters. 500  * 501  * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 502  * character, until this returns {@code false} or the end is reached. 503  * 504  * @param sequence the character sequence to examine, possibly empty 505  * @return {@code true} if this matcher matches every character in the sequence, including when 506  * the sequence is empty 507  */ 508  public boolean matchesAllOf(CharSequence sequence) { 509  for (int i = sequence.length() - 1; i >= 0; i--) { 510  if (!matches(sequence.charAt(i))) { 511  return false; 512  } 513  } 514  return true; 515  } 516  517  /** 518  * Returns {@code true} if a character sequence contains no matching BMP characters. Equivalent to 519  * {@code !matchesAnyOf(sequence)}. 520  * 521  * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each 522  * character, until this returns {@code true} or the end is reached. 523  * 524  * @param sequence the character sequence to examine, possibly empty 525  * @return {@code true} if this matcher matches no characters in the sequence, including when the 526  * sequence is empty 527  */ 528  public boolean matchesNoneOf(CharSequence sequence) { 529  return indexIn(sequence) == -1; 530  } 531  532  /** 533  * Returns the index of the first matching BMP character in a character sequence, or {@code -1} if 534  * no matching character is present. 535  * 536  * <p>The default implementation iterates over the sequence in forward order calling {@link 537  * #matches} for each character. 538  * 539  * @param sequence the character sequence to examine from the beginning 540  * @return an index, or {@code -1} if no character matches 541  */ 542  public int indexIn(CharSequence sequence) { 543  return indexIn(sequence, 0); 544  } 545  546  /** 547  * Returns the index of the first matching BMP character in a character sequence, starting from a 548  * given position, or {@code -1} if no character matches after that position. 549  * 550  * <p>The default implementation iterates over the sequence in forward order, beginning at {@code 551  * start}, calling {@link #matches} for each character. 552  * 553  * @param sequence the character sequence to examine 554  * @param start the first index to examine; must be nonnegative and no greater than {@code 555  * sequence.length()} 556  * @return the index of the first matching character, guaranteed to be no less than {@code start}, 557  * or {@code -1} if no character matches 558  * @throws IndexOutOfBoundsException if start is negative or greater than {@code 559  * sequence.length()} 560  */ 561  public int indexIn(CharSequence sequence, int start) { 562  int length = sequence.length(); 563  checkPositionIndex(start, length); 564  for (int i = start; i < length; i++) { 565  if (matches(sequence.charAt(i))) { 566  return i; 567  } 568  } 569  return -1; 570  } 571  572  /** 573  * Returns the index of the last matching BMP character in a character sequence, or {@code -1} if 574  * no matching character is present. 575  * 576  * <p>The default implementation iterates over the sequence in reverse order calling {@link 577  * #matches} for each character. 578  * 579  * @param sequence the character sequence to examine from the end 580  * @return an index, or {@code -1} if no character matches 581  */ 582  public int lastIndexIn(CharSequence sequence) { 583  for (int i = sequence.length() - 1; i >= 0; i--) { 584  if (matches(sequence.charAt(i))) { 585  return i; 586  } 587  } 588  return -1; 589  } 590  591  /** 592  * Returns the number of matching {@code char}s found in a character sequence. 593  * 594  * <p>Counts 2 per supplementary character, such as for {@link #whitespace}().{@link #negate}(). 595  */ 596  public int countIn(CharSequence sequence) { 597  int count = 0; 598  for (int i = 0; i < sequence.length(); i++) { 599  if (matches(sequence.charAt(i))) { 600  count++; 601  } 602  } 603  return count; 604  } 605  606  /** 607  * Returns a string containing all non-matching characters of a character sequence, in order. For 608  * example: 609  * 610  * <pre>{@code 611  * CharMatcher.is('a').removeFrom("bazaar") 612  * }</pre> 613  * 614  * ... returns {@code "bzr"}. 615  */ 616  public String removeFrom(CharSequence sequence) { 617  String string = sequence.toString(); 618  int pos = indexIn(string); 619  if (pos == -1) { 620  return string; 621  } 622  623  char[] chars = string.toCharArray(); 624  int spread = 1; 625  626  // This unusual loop comes from extensive benchmarking 627  OUT: 628  while (true) { 629  pos++; 630  while (true) { 631  if (pos == chars.length) { 632  break OUT; 633  } 634  if (matches(chars[pos])) { 635  break; 636  } 637  chars[pos - spread] = chars[pos]; 638  pos++; 639  } 640  spread++; 641  } 642  return new String(chars, 0, pos - spread); 643  } 644  645  /** 646  * Returns a string containing all matching BMP characters of a character sequence, in order. For 647  * example: 648  * 649  * <pre>{@code 650  * CharMatcher.is('a').retainFrom("bazaar") 651  * }</pre> 652  * 653  * ... returns {@code "aaa"}. 654  */ 655  public String retainFrom(CharSequence sequence) { 656  return negate().removeFrom(sequence); 657  } 658  659  /** 660  * Returns a string copy of the input character sequence, with each matching BMP character 661  * replaced by a given replacement character. For example: 662  * 663  * <pre>{@code 664  * CharMatcher.is('a').replaceFrom("radar", 'o') 665  * }</pre> 666  * 667  * ... returns {@code "rodor"}. 668  * 669  * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 670  * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 671  * character. 672  * 673  * @param sequence the character sequence to replace matching characters in 674  * @param replacement the character to append to the result string in place of each matching 675  * character in {@code sequence} 676  * @return the new string 677  */ 678  public String replaceFrom(CharSequence sequence, char replacement) { 679  String string = sequence.toString(); 680  int pos = indexIn(string); 681  if (pos == -1) { 682  return string; 683  } 684  char[] chars = string.toCharArray(); 685  chars[pos] = replacement; 686  for (int i = pos + 1; i < chars.length; i++) { 687  if (matches(chars[i])) { 688  chars[i] = replacement; 689  } 690  } 691  return new String(chars); 692  } 693  694  /** 695  * Returns a string copy of the input character sequence, with each matching BMP character 696  * replaced by a given replacement sequence. For example: 697  * 698  * <pre>{@code 699  * CharMatcher.is('a').replaceFrom("yaha", "oo") 700  * }</pre> 701  * 702  * ... returns {@code "yoohoo"}. 703  * 704  * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better 705  * off calling {@link #replaceFrom(CharSequence, char)} directly. 706  * 707  * @param sequence the character sequence to replace matching characters in 708  * @param replacement the characters to append to the result string in place of each matching 709  * character in {@code sequence} 710  * @return the new string 711  */ 712  public String replaceFrom(CharSequence sequence, CharSequence replacement) { 713  int replacementLen = replacement.length(); 714  if (replacementLen == 0) { 715  return removeFrom(sequence); 716  } 717  if (replacementLen == 1) { 718  return replaceFrom(sequence, replacement.charAt(0)); 719  } 720  721  String string = sequence.toString(); 722  int pos = indexIn(string); 723  if (pos == -1) { 724  return string; 725  } 726  727  int len = string.length(); 728  StringBuilder buf = new StringBuilder((len * 3 / 2) + 16); 729  730  int oldpos = 0; 731  do { 732  buf.append(string, oldpos, pos); 733  buf.append(replacement); 734  oldpos = pos + 1; 735  pos = indexIn(string, oldpos); 736  } while (pos != -1); 737  738  buf.append(string, oldpos, len); 739  return buf.toString(); 740  } 741  742  /** 743  * Returns a substring of the input character sequence that omits all matching BMP characters from 744  * the beginning and from the end of the string. For example: 745  * 746  * <pre>{@code 747  * CharMatcher.anyOf("ab").trimFrom("abacatbab") 748  * }</pre> 749  * 750  * ... returns {@code "cat"}. 751  * 752  * <p>Note that: 753  * 754  * <pre>{@code 755  * CharMatcher.inRange('\0', ' ').trimFrom(str) 756  * }</pre> 757  * 758  * ... is equivalent to {@link String#trim()}. 759  */ 760  public String trimFrom(CharSequence sequence) { 761  int len = sequence.length(); 762  int first; 763  int last; 764  765  for (first = 0; first < len; first++) { 766  if (!matches(sequence.charAt(first))) { 767  break; 768  } 769  } 770  for (last = len - 1; last > first; last--) { 771  if (!matches(sequence.charAt(last))) { 772  break; 773  } 774  } 775  776  return sequence.subSequence(first, last + 1).toString(); 777  } 778  779  /** 780  * Returns a substring of the input character sequence that omits all matching BMP characters from 781  * the beginning of the string. For example: 782  * 783  * <pre>{@code 784  * CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab") 785  * }</pre> 786  * 787  * ... returns {@code "catbab"}. 788  */ 789  public String trimLeadingFrom(CharSequence sequence) { 790  int len = sequence.length(); 791  for (int first = 0; first < len; first++) { 792  if (!matches(sequence.charAt(first))) { 793  return sequence.subSequence(first, len).toString(); 794  } 795  } 796  return ""; 797  } 798  799  /** 800  * Returns a substring of the input character sequence that omits all matching BMP characters from 801  * the end of the string. For example: 802  * 803  * <pre>{@code 804  * CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab") 805  * }</pre> 806  * 807  * ... returns {@code "abacat"}. 808  */ 809  public String trimTrailingFrom(CharSequence sequence) { 810  int len = sequence.length(); 811  for (int last = len - 1; last >= 0; last--) { 812  if (!matches(sequence.charAt(last))) { 813  return sequence.subSequence(0, last + 1).toString(); 814  } 815  } 816  return ""; 817  } 818  819  /** 820  * Returns a string copy of the input character sequence, with each group of consecutive matching 821  * BMP characters replaced by a single replacement character. For example: 822  * 823  * <pre>{@code 824  * CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-') 825  * }</pre> 826  * 827  * ... returns {@code "b-p-r"}. 828  * 829  * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching 830  * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each 831  * character. 832  * 833  * @param sequence the character sequence to replace matching groups of characters in 834  * @param replacement the character to append to the result string in place of each group of 835  * matching characters in {@code sequence} 836  * @return the new string 837  */ 838  public String collapseFrom(CharSequence sequence, char replacement) { 839  // This implementation avoids unnecessary allocation. 840  int len = sequence.length(); 841  for (int i = 0; i < len; i++) { 842  char c = sequence.charAt(i); 843  if (matches(c)) { 844  if (c == replacement && (i == len - 1 || !matches(sequence.charAt(i + 1)))) { 845  // a no-op replacement 846  i++; 847  } else { 848  StringBuilder builder = new StringBuilder(len).append(sequence, 0, i).append(replacement); 849  return finishCollapseFrom(sequence, i + 1, len, replacement, builder, true); 850  } 851  } 852  } 853  // no replacement needed 854  return sequence.toString(); 855  } 856  857  /** 858  * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that 859  * groups of matching BMP characters at the start or end of the sequence are removed without 860  * replacement. 861  */ 862  public String trimAndCollapseFrom(CharSequence sequence, char replacement) { 863  // This implementation avoids unnecessary allocation. 864  int len = sequence.length(); 865  int first = 0; 866  int last = len - 1; 867  868  while (first < len && matches(sequence.charAt(first))) { 869  first++; 870  } 871  872  while (last > first && matches(sequence.charAt(last))) { 873  last--; 874  } 875  876  return (first == 0 && last == len - 1) 877  ? collapseFrom(sequence, replacement) 878  : finishCollapseFrom( 879  sequence, first, last + 1, replacement, new StringBuilder(last + 1 - first), false); 880  } 881  882  private String finishCollapseFrom( 883  CharSequence sequence, 884  int start, 885  int end, 886  char replacement, 887  StringBuilder builder, 888  boolean inMatchingGroup) { 889  for (int i = start; i < end; i++) { 890  char c = sequence.charAt(i); 891  if (matches(c)) { 892  if (!inMatchingGroup) { 893  builder.append(replacement); 894  inMatchingGroup = true; 895  } 896  } else { 897  builder.append(c); 898  inMatchingGroup = false; 899  } 900  } 901  return builder.toString(); 902  } 903  904  /** 905  * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #matches} 906  * instead. 907  */ 908  @Deprecated 909  @Override 910  public boolean apply(Character character) { 911  return matches(character); 912  } 913  914  /** 915  * Returns a string representation of this {@code CharMatcher}, such as {@code 916  * CharMatcher.or(WHITESPACE, JAVA_DIGIT)}. 917  */ 918  @Override 919  public String toString() { 920  return super.toString(); 921  } 922  923  /** 924  * Returns the Java Unicode escape sequence for the given {@code char}, in the form "\u12AB" where 925  * "12AB" is the four hexadecimal digits representing the 16-bit code unit. 926  */ 927  private static String showCharacter(char c) { 928  String hex = "0123456789ABCDEF"; 929  char[] tmp = {'\\', 'u', '\0', '\0', '\0', '\0'}; 930  for (int i = 0; i < 4; i++) { 931  tmp[5 - i] = hex.charAt(c & 0xF); 932  c = (char) (c >> 4); 933  } 934  return String.copyValueOf(tmp); 935  } 936  937  // Fast matchers 938  939  /** A matcher for which precomputation will not yield any significant benefit. */ 940  abstract static class FastMatcher extends CharMatcher { 941  942  @Override 943  public final CharMatcher precomputed() { 944  return this; 945  } 946  947  @Override 948  public CharMatcher negate() { 949  return new NegatedFastMatcher(this); 950  } 951  } 952  953  /** {@link FastMatcher} which overrides {@code toString()} with a custom name. */ 954  abstract static class NamedFastMatcher extends FastMatcher { 955  956  private final String description; 957  958  NamedFastMatcher(String description) { 959  this.description = checkNotNull(description); 960  } 961  962  @Override 963  public final String toString() { 964  return description; 965  } 966  } 967  968  /** Negation of a {@link FastMatcher}. */ 969  static class NegatedFastMatcher extends Negated { 970  971  NegatedFastMatcher(CharMatcher original) { 972  super(original); 973  } 974  975  @Override 976  public final CharMatcher precomputed() { 977  return this; 978  } 979  } 980  981  /** Fast matcher using a {@link BitSet} table of matching characters. */ 982  @GwtIncompatible // used only from other GwtIncompatible code 983  private static final class BitSetMatcher extends NamedFastMatcher { 984  985  private final BitSet table; 986  987  private BitSetMatcher(BitSet table, String description) { 988  super(description); 989  if (table.length() + Long.SIZE < table.size()) { 990  table = (BitSet) table.clone(); 991  // If only we could actually call BitSet.trimToSize() ourselves... 992  } 993  this.table = table; 994  } 995  996  @Override 997  public boolean matches(char c) { 998  return table.get(c); 999  } 1000  1001  @Override 1002  void setBits(BitSet bitSet) { 1003  bitSet.or(table); 1004  } 1005  } 1006  1007  // Static constant implementation classes 1008  1009  /** Implementation of {@link #any()}. */ 1010  private static final class Any extends NamedFastMatcher { 1011  1012  static final Any INSTANCE = new Any(); 1013  1014  private Any() { 1015  super("CharMatcher.any()"); 1016  } 1017  1018  @Override 1019  public boolean matches(char c) { 1020  return true; 1021  } 1022  1023  @Override 1024  public int indexIn(CharSequence sequence) { 1025  return (sequence.length() == 0) ? -1 : 0; 1026  } 1027  1028  @Override 1029  public int indexIn(CharSequence sequence, int start) { 1030  int length = sequence.length(); 1031  checkPositionIndex(start, length); 1032  return (start == length) ? -1 : start; 1033  } 1034  1035  @Override 1036  public int lastIndexIn(CharSequence sequence) { 1037  return sequence.length() - 1; 1038  } 1039  1040  @Override 1041  public boolean matchesAllOf(CharSequence sequence) { 1042  checkNotNull(sequence); 1043  return true; 1044  } 1045  1046  @Override 1047  public boolean matchesNoneOf(CharSequence sequence) { 1048  return sequence.length() == 0; 1049  } 1050  1051  @Override 1052  public String removeFrom(CharSequence sequence) { 1053  checkNotNull(sequence); 1054  return ""; 1055  } 1056  1057  @Override 1058  public String replaceFrom(CharSequence sequence, char replacement) { 1059  char[] array = new char[sequence.length()]; 1060  Arrays.fill(array, replacement); 1061  return new String(array); 1062  } 1063  1064  @Override 1065  public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1066  StringBuilder result = new StringBuilder(sequence.length() * replacement.length()); 1067  for (int i = 0; i < sequence.length(); i++) { 1068  result.append(replacement); 1069  } 1070  return result.toString(); 1071  } 1072  1073  @Override 1074  public String collapseFrom(CharSequence sequence, char replacement) { 1075  return (sequence.length() == 0) ? "" : String.valueOf(replacement); 1076  } 1077  1078  @Override 1079  public String trimFrom(CharSequence sequence) { 1080  checkNotNull(sequence); 1081  return ""; 1082  } 1083  1084  @Override 1085  public int countIn(CharSequence sequence) { 1086  return sequence.length(); 1087  } 1088  1089  @Override 1090  public CharMatcher and(CharMatcher other) { 1091  return checkNotNull(other); 1092  } 1093  1094  @Override 1095  public CharMatcher or(CharMatcher other) { 1096  checkNotNull(other); 1097  return this; 1098  } 1099  1100  @Override 1101  public CharMatcher negate() { 1102  return none(); 1103  } 1104  } 1105  1106  /** Implementation of {@link #none()}. */ 1107  private static final class None extends NamedFastMatcher { 1108  1109  static final None INSTANCE = new None(); 1110  1111  private None() { 1112  super("CharMatcher.none()"); 1113  } 1114  1115  @Override 1116  public boolean matches(char c) { 1117  return false; 1118  } 1119  1120  @Override 1121  public int indexIn(CharSequence sequence) { 1122  checkNotNull(sequence); 1123  return -1; 1124  } 1125  1126  @Override 1127  public int indexIn(CharSequence sequence, int start) { 1128  int length = sequence.length(); 1129  checkPositionIndex(start, length); 1130  return -1; 1131  } 1132  1133  @Override 1134  public int lastIndexIn(CharSequence sequence) { 1135  checkNotNull(sequence); 1136  return -1; 1137  } 1138  1139  @Override 1140  public boolean matchesAllOf(CharSequence sequence) { 1141  return sequence.length() == 0; 1142  } 1143  1144  @Override 1145  public boolean matchesNoneOf(CharSequence sequence) { 1146  checkNotNull(sequence); 1147  return true; 1148  } 1149  1150  @Override 1151  public String removeFrom(CharSequence sequence) { 1152  return sequence.toString(); 1153  } 1154  1155  @Override 1156  public String replaceFrom(CharSequence sequence, char replacement) { 1157  return sequence.toString(); 1158  } 1159  1160  @Override 1161  public String replaceFrom(CharSequence sequence, CharSequence replacement) { 1162  checkNotNull(replacement); 1163  return sequence.toString(); 1164  } 1165  1166  @Override 1167  public String collapseFrom(CharSequence sequence, char replacement) { 1168  return sequence.toString(); 1169  } 1170  1171  @Override 1172  public String trimFrom(CharSequence sequence) { 1173  return sequence.toString(); 1174  } 1175  1176  @Override 1177  public String trimLeadingFrom(CharSequence sequence) { 1178  return sequence.toString(); 1179  } 1180  1181  @Override 1182  public String trimTrailingFrom(CharSequence sequence) { 1183  return sequence.toString(); 1184  } 1185  1186  @Override 1187  public int countIn(CharSequence sequence) { 1188  checkNotNull(sequence); 1189  return 0; 1190  } 1191  1192  @Override 1193  public CharMatcher and(CharMatcher other) { 1194  checkNotNull(other); 1195  return this; 1196  } 1197  1198  @Override 1199  public CharMatcher or(CharMatcher other) { 1200  return checkNotNull(other); 1201  } 1202  1203  @Override 1204  public CharMatcher negate() { 1205  return any(); 1206  } 1207  } 1208  1209  /** Implementation of {@link #whitespace()}. */ 1210  @VisibleForTesting 1211  static final class Whitespace extends NamedFastMatcher { 1212  1213  // TABLE is a precomputed hashset of whitespace characters. MULTIPLIER serves as a hash function 1214  // whose key property is that it maps 25 characters into the 32-slot table without collision. 1215  // Basically this is an opportunistic fast implementation as opposed to "good code". For most 1216  // other use-cases, the reduction in readability isn't worth it. 1217  static final String TABLE = 1218  "\u2002\u3000\r\u0085\u200A\u2005\u2000\u3000" 1219  + "\u2029\u000B\u3000\u2008\u2003\u205F\u3000\u1680" 1220  + "\u0009\u0020\u2006\u2001\u202F\u00A0\u000C\u2009" 1221  + "\u3000\u2004\u3000\u3000\u2028\n\u2007\u3000"; 1222  static final int MULTIPLIER = 1682554634; 1223  static final int SHIFT = Integer.numberOfLeadingZeros(TABLE.length() - 1); 1224  1225  static final Whitespace INSTANCE = new Whitespace(); 1226  1227  Whitespace() { 1228  super("CharMatcher.whitespace()"); 1229  } 1230  1231  @Override 1232  public boolean matches(char c) { 1233  return TABLE.charAt((MULTIPLIER * c) >>> SHIFT) == c; 1234  } 1235  1236  @GwtIncompatible // used only from other GwtIncompatible code 1237  @Override 1238  void setBits(BitSet table) { 1239  for (int i = 0; i < TABLE.length(); i++) { 1240  table.set(TABLE.charAt(i)); 1241  } 1242  } 1243  } 1244  1245  /** Implementation of {@link #breakingWhitespace()}. */ 1246  private static final class BreakingWhitespace extends CharMatcher { 1247  1248  static final CharMatcher INSTANCE = new BreakingWhitespace(); 1249  1250  @Override 1251  public boolean matches(char c) { 1252  switch (c) { 1253  case '\t': 1254  case '\n': 1255  case '\013': 1256  case '\f': 1257  case '\r': 1258  case ' ': 1259  case '\u0085': 1260  case '\u1680': 1261  case '\u2028': 1262  case '\u2029': 1263  case '\u205f': 1264  case '\u3000': 1265  return true; 1266  case '\u2007': 1267  return false; 1268  default: 1269  return c >= '\u2000' && c <= '\u200a'; 1270  } 1271  } 1272  1273  @Override 1274  public String toString() { 1275  return "CharMatcher.breakingWhitespace()"; 1276  } 1277  } 1278  1279  /** Implementation of {@link #ascii()}. */ 1280  private static final class Ascii extends NamedFastMatcher { 1281  1282  static final Ascii INSTANCE = new Ascii(); 1283  1284  Ascii() { 1285  super("CharMatcher.ascii()"); 1286  } 1287  1288  @Override 1289  public boolean matches(char c) { 1290  return c <= '\u007f'; 1291  } 1292  } 1293  1294  /** Implementation that matches characters that fall within multiple ranges. */ 1295  private static class RangesMatcher extends CharMatcher { 1296  1297  private final String description; 1298  private final char[] rangeStarts; 1299  private final char[] rangeEnds; 1300  1301  RangesMatcher(String description, char[] rangeStarts, char[] rangeEnds) { 1302  this.description = description; 1303  this.rangeStarts = rangeStarts; 1304  this.rangeEnds = rangeEnds; 1305  checkArgument(rangeStarts.length == rangeEnds.length); 1306  for (int i = 0; i < rangeStarts.length; i++) { 1307  checkArgument(rangeStarts[i] <= rangeEnds[i]); 1308  if (i + 1 < rangeStarts.length) { 1309  checkArgument(rangeEnds[i] < rangeStarts[i + 1]); 1310  } 1311  } 1312  } 1313  1314  @Override 1315  public boolean matches(char c) { 1316  int index = Arrays.binarySearch(rangeStarts, c); 1317  if (index >= 0) { 1318  return true; 1319  } else { 1320  index = ~index - 1; 1321  return index >= 0 && c <= rangeEnds[index]; 1322  } 1323  } 1324  1325  @Override 1326  public String toString() { 1327  return description; 1328  } 1329  } 1330  1331  /** Implementation of {@link #digit()}. */ 1332  private static final class Digit extends RangesMatcher { 1333  // Plug the following UnicodeSet pattern into 1334  // https://unicode.org/cldr/utility/list-unicodeset.jsp 1335  // [[:Nd:]&[:nv=0:]&[\u0000-\uFFFF]] 1336  // and get the zeroes from there. 1337  1338  // Must be in ascending order. 1339  private static final String ZEROES = 1340  "0\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66\u0ce6\u0d66\u0de6" 1341  + "\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946\u19d0\u1a80\u1a90\u1b50\u1bb0" 1342  + "\u1c40\u1c50\ua620\ua8d0\ua900\ua9d0\ua9f0\uaa50\uabf0\uff10"; 1343  1344  private static char[] zeroes() { 1345  return ZEROES.toCharArray(); 1346  } 1347  1348  private static char[] nines() { 1349  char[] nines = new char[ZEROES.length()]; 1350  for (int i = 0; i < ZEROES.length(); i++) { 1351  nines[i] = (char) (ZEROES.charAt(i) + 9); 1352  } 1353  return nines; 1354  } 1355  1356  static final Digit INSTANCE = new Digit(); 1357  1358  private Digit() { 1359  super("CharMatcher.digit()", zeroes(), nines()); 1360  } 1361  } 1362  1363  /** Implementation of {@link #javaDigit()}. */ 1364  private static final class JavaDigit extends CharMatcher { 1365  1366  static final JavaDigit INSTANCE = new JavaDigit(); 1367  1368  @Override 1369  public boolean matches(char c) { 1370  return Character.isDigit(c); 1371  } 1372  1373  @Override 1374  public String toString() { 1375  return "CharMatcher.javaDigit()"; 1376  } 1377  } 1378  1379  /** Implementation of {@link #javaLetter()}. */ 1380  private static final class JavaLetter extends CharMatcher { 1381  1382  static final JavaLetter INSTANCE = new JavaLetter(); 1383  1384  @Override 1385  public boolean matches(char c) { 1386  return Character.isLetter(c); 1387  } 1388  1389  @Override 1390  public String toString() { 1391  return "CharMatcher.javaLetter()"; 1392  } 1393  } 1394  1395  /** Implementation of {@link #javaLetterOrDigit()}. */ 1396  private static final class JavaLetterOrDigit extends CharMatcher { 1397  1398  static final JavaLetterOrDigit INSTANCE = new JavaLetterOrDigit(); 1399  1400  @Override 1401  public boolean matches(char c) { 1402  return Character.isLetterOrDigit(c); 1403  } 1404  1405  @Override 1406  public String toString() { 1407  return "CharMatcher.javaLetterOrDigit()"; 1408  } 1409  } 1410  1411  /** Implementation of {@link #javaUpperCase()}. */ 1412  private static final class JavaUpperCase extends CharMatcher { 1413  1414  static final JavaUpperCase INSTANCE = new JavaUpperCase(); 1415  1416  @Override 1417  public boolean matches(char c) { 1418  return Character.isUpperCase(c); 1419  } 1420  1421  @Override 1422  public String toString() { 1423  return "CharMatcher.javaUpperCase()"; 1424  } 1425  } 1426  1427  /** Implementation of {@link #javaLowerCase()}. */ 1428  private static final class JavaLowerCase extends CharMatcher { 1429  1430  static final JavaLowerCase INSTANCE = new JavaLowerCase(); 1431  1432  @Override 1433  public boolean matches(char c) { 1434  return Character.isLowerCase(c); 1435  } 1436  1437  @Override 1438  public String toString() { 1439  return "CharMatcher.javaLowerCase()"; 1440  } 1441  } 1442  1443  /** Implementation of {@link #javaIsoControl()}. */ 1444  private static final class JavaIsoControl extends NamedFastMatcher { 1445  1446  static final JavaIsoControl INSTANCE = new JavaIsoControl(); 1447  1448  private JavaIsoControl() { 1449  super("CharMatcher.javaIsoControl()"); 1450  } 1451  1452  @Override 1453  public boolean matches(char c) { 1454  return c <= '\u001f' || (c >= '\u007f' && c <= '\u009f'); 1455  } 1456  } 1457  1458  /** Implementation of {@link #invisible()}. */ 1459  private static final class Invisible extends RangesMatcher { 1460  // Plug the following UnicodeSet pattern into 1461  // https://unicode.org/cldr/utility/list-unicodeset.jsp 1462  // [[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:][:Cs:][:Co:]]&[\u0000-\uFFFF]] 1463  // with the "Abbreviate" option, and get the ranges from there. 1464  private static final String RANGE_STARTS = 1465  "\u0000\u007f\u00ad\u0600\u061c\u06dd\u070f\u08e2\u1680\u180e\u2000\u2028\u205f\u2066" 1466  + "\u3000\ud800\ufeff\ufff9"; 1467  private static final String RANGE_ENDS = // inclusive ends 1468  "\u0020\u00a0\u00ad\u0605\u061c\u06dd\u070f\u08e2\u1680\u180e\u200f\u202f\u2064\u206f" 1469  + "\u3000\uf8ff\ufeff\ufffb"; 1470  1471  static final Invisible INSTANCE = new Invisible(); 1472  1473  private Invisible() { 1474  super("CharMatcher.invisible()", RANGE_STARTS.toCharArray(), RANGE_ENDS.toCharArray()); 1475  } 1476  } 1477  1478  /** Implementation of {@link #singleWidth()}. */ 1479  private static final class SingleWidth extends RangesMatcher { 1480  1481  static final SingleWidth INSTANCE = new SingleWidth(); 1482  1483  private SingleWidth() { 1484  super( 1485  "CharMatcher.singleWidth()", 1486  "\u0000\u05be\u05d0\u05f3\u0600\u0750\u0e00\u1e00\u2100\ufb50\ufe70\uff61".toCharArray(), 1487  "\u04f9\u05be\u05ea\u05f4\u06ff\u077f\u0e7f\u20af\u213a\ufdff\ufeff\uffdc".toCharArray()); 1488  } 1489  } 1490  1491  // Non-static factory implementation classes 1492  1493  /** Implementation of {@link #negate()}. */ 1494  private static class Negated extends CharMatcher { 1495  1496  final CharMatcher original; 1497  1498  Negated(CharMatcher original) { 1499  this.original = checkNotNull(original); 1500  } 1501  1502  @Override 1503  public boolean matches(char c) { 1504  return !original.matches(c); 1505  } 1506  1507  @Override 1508  public boolean matchesAllOf(CharSequence sequence) { 1509  return original.matchesNoneOf(sequence); 1510  } 1511  1512  @Override 1513  public boolean matchesNoneOf(CharSequence sequence) { 1514  return original.matchesAllOf(sequence); 1515  } 1516  1517  @Override 1518  public int countIn(CharSequence sequence) { 1519  return sequence.length() - original.countIn(sequence); 1520  } 1521  1522  @GwtIncompatible // used only from other GwtIncompatible code 1523  @Override 1524  void setBits(BitSet table) { 1525  BitSet tmp = new BitSet(); 1526  original.setBits(tmp); 1527  tmp.flip(Character.MIN_VALUE, Character.MAX_VALUE + 1); 1528  table.or(tmp); 1529  } 1530  1531  @Override 1532  public CharMatcher negate() { 1533  return original; 1534  } 1535  1536  @Override 1537  public String toString() { 1538  return original + ".negate()"; 1539  } 1540  } 1541  1542  /** Implementation of {@link #and(CharMatcher)}. */ 1543  private static final class And extends CharMatcher { 1544  1545  final CharMatcher first; 1546  final CharMatcher second; 1547  1548  And(CharMatcher a, CharMatcher b) { 1549  first = checkNotNull(a); 1550  second = checkNotNull(b); 1551  } 1552  1553  @Override 1554  public boolean matches(char c) { 1555  return first.matches(c) && second.matches(c); 1556  } 1557  1558  @GwtIncompatible // used only from other GwtIncompatible code 1559  @Override 1560  void setBits(BitSet table) { 1561  BitSet tmp1 = new BitSet(); 1562  first.setBits(tmp1); 1563  BitSet tmp2 = new BitSet(); 1564  second.setBits(tmp2); 1565  tmp1.and(tmp2); 1566  table.or(tmp1); 1567  } 1568  1569  @Override 1570  public String toString() { 1571  return "CharMatcher.and(" + first + ", " + second + ")"; 1572  } 1573  } 1574  1575  /** Implementation of {@link #or(CharMatcher)}. */ 1576  private static final class Or extends CharMatcher { 1577  1578  final CharMatcher first; 1579  final CharMatcher second; 1580  1581  Or(CharMatcher a, CharMatcher b) { 1582  first = checkNotNull(a); 1583  second = checkNotNull(b); 1584  } 1585  1586  @GwtIncompatible // used only from other GwtIncompatible code 1587  @Override 1588  void setBits(BitSet table) { 1589  first.setBits(table); 1590  second.setBits(table); 1591  } 1592  1593  @Override 1594  public boolean matches(char c) { 1595  return first.matches(c) || second.matches(c); 1596  } 1597  1598  @Override 1599  public String toString() { 1600  return "CharMatcher.or(" + first + ", " + second + ")"; 1601  } 1602  } 1603  1604  // Static factory implementations 1605  1606  /** Implementation of {@link #is(char)}. */ 1607  private static final class Is extends FastMatcher { 1608  1609  private final char match; 1610  1611  Is(char match) { 1612  this.match = match; 1613  } 1614  1615  @Override 1616  public boolean matches(char c) { 1617  return c == match; 1618  } 1619  1620  @Override 1621  public String replaceFrom(CharSequence sequence, char replacement) { 1622  return sequence.toString().replace(match, replacement); 1623  } 1624  1625  @Override 1626  public CharMatcher and(CharMatcher other) { 1627  return other.matches(match) ? this : none(); 1628  } 1629  1630  @Override 1631  public CharMatcher or(CharMatcher other) { 1632  return other.matches(match) ? other : super.or(other); 1633  } 1634  1635  @Override 1636  public CharMatcher negate() { 1637  return isNot(match); 1638  } 1639  1640  @GwtIncompatible // used only from other GwtIncompatible code 1641  @Override 1642  void setBits(BitSet table) { 1643  table.set(match); 1644  } 1645  1646  @Override 1647  public String toString() { 1648  return "CharMatcher.is('" + showCharacter(match) + "')"; 1649  } 1650  } 1651  1652  /** Implementation of {@link #isNot(char)}. */ 1653  private static final class IsNot extends FastMatcher { 1654  1655  private final char match; 1656  1657  IsNot(char match) { 1658  this.match = match; 1659  } 1660  1661  @Override 1662  public boolean matches(char c) { 1663  return c != match; 1664  } 1665  1666  @Override 1667  public CharMatcher and(CharMatcher other) { 1668  return other.matches(match) ? super.and(other) : other; 1669  } 1670  1671  @Override 1672  public CharMatcher or(CharMatcher other) { 1673  return other.matches(match) ? any() : this; 1674  } 1675  1676  @GwtIncompatible // used only from other GwtIncompatible code 1677  @Override 1678  void setBits(BitSet table) { 1679  table.set(0, match); 1680  table.set(match + 1, Character.MAX_VALUE + 1); 1681  } 1682  1683  @Override 1684  public CharMatcher negate() { 1685  return is(match); 1686  } 1687  1688  @Override 1689  public String toString() { 1690  return "CharMatcher.isNot('" + showCharacter(match) + "')"; 1691  } 1692  } 1693  1694  private static CharMatcher.IsEither isEither(char c1, char c2) { 1695  return new CharMatcher.IsEither(c1, c2); 1696  } 1697  1698  /** Implementation of {@link #anyOf(CharSequence)} for exactly two characters. */ 1699  private static final class IsEither extends FastMatcher { 1700  1701  private final char match1; 1702  private final char match2; 1703  1704  IsEither(char match1, char match2) { 1705  this.match1 = match1; 1706  this.match2 = match2; 1707  } 1708  1709  @Override 1710  public boolean matches(char c) { 1711  return c == match1 || c == match2; 1712  } 1713  1714  @GwtIncompatible // used only from other GwtIncompatible code 1715  @Override 1716  void setBits(BitSet table) { 1717  table.set(match1); 1718  table.set(match2); 1719  } 1720  1721  @Override 1722  public String toString() { 1723  return "CharMatcher.anyOf(\"" + showCharacter(match1) + showCharacter(match2) + "\")"; 1724  } 1725  } 1726  1727  /** Implementation of {@link #anyOf(CharSequence)} for three or more characters. */ 1728  private static final class AnyOf extends CharMatcher { 1729  1730  private final char[] chars; 1731  1732  public AnyOf(CharSequence chars) { 1733  this.chars = chars.toString().toCharArray(); 1734  Arrays.sort(this.chars); 1735  } 1736  1737  @Override 1738  public boolean matches(char c) { 1739  return Arrays.binarySearch(chars, c) >= 0; 1740  } 1741  1742  @Override 1743  @GwtIncompatible // used only from other GwtIncompatible code 1744  void setBits(BitSet table) { 1745  for (char c : chars) { 1746  table.set(c); 1747  } 1748  } 1749  1750  @Override 1751  public String toString() { 1752  StringBuilder description = new StringBuilder("CharMatcher.anyOf(\""); 1753  for (char c : chars) { 1754  description.append(showCharacter(c)); 1755  } 1756  description.append("\")"); 1757  return description.toString(); 1758  } 1759  } 1760  1761  /** Implementation of {@link #inRange(char, char)}. */ 1762  private static final class InRange extends FastMatcher { 1763  1764  private final char startInclusive; 1765  private final char endInclusive; 1766  1767  InRange(char startInclusive, char endInclusive) { 1768  checkArgument(endInclusive >= startInclusive); 1769  this.startInclusive = startInclusive; 1770  this.endInclusive = endInclusive; 1771  } 1772  1773  @Override 1774  public boolean matches(char c) { 1775  return startInclusive <= c && c <= endInclusive; 1776  } 1777  1778  @GwtIncompatible // used only from other GwtIncompatible code 1779  @Override 1780  void setBits(BitSet table) { 1781  table.set(startInclusive, endInclusive + 1); 1782  } 1783  1784  @Override 1785  public String toString() { 1786  return "CharMatcher.inRange('" 1787  + showCharacter(startInclusive) 1788  + "', '" 1789  + showCharacter(endInclusive) 1790  + "')"; 1791  } 1792  } 1793  1794  /** Implementation of {@link #forPredicate(Predicate)}. */ 1795  private static final class ForPredicate extends CharMatcher { 1796  1797  private final Predicate<? super Character> predicate; 1798  1799  ForPredicate(Predicate<? super Character> predicate) { 1800  this.predicate = checkNotNull(predicate); 1801  } 1802  1803  @Override 1804  public boolean matches(char c) { 1805  return predicate.apply(c); 1806  } 1807  1808  @SuppressWarnings("deprecation") // intentional; deprecation is for callers primarily 1809  @Override 1810  public boolean apply(Character character) { 1811  return predicate.apply(checkNotNull(character)); 1812  } 1813  1814  @Override 1815  public String toString() { 1816  return "CharMatcher.forPredicate(" + predicate + ")"; 1817  } 1818  } 1819 }