Coverage Summary for Class: Escapers (com.google.common.escape)

Class Method, % Line, %
Escapers 0% (0/9) 0% (0/15)
Escapers$1 0% (0/3) 0% (0/3)
Escapers$2 0% (0/2) 0% (0/21)
Escapers$Builder 0% (0/6) 0% (0/14)
Escapers$Builder$1 0% (0/2) 0% (0/3)
Total 0% (0/22) 0% (0/56)


1 /* 2  * Copyright (C) 2009 The Guava Authors 3  * 4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5  * in compliance with the License. You may obtain a copy of the License at 6  * 7  * http://www.apache.org/licenses/LICENSE-2.0 8  * 9  * Unless required by applicable law or agreed to in writing, software distributed under the License 10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11  * or implied. See the License for the specific language governing permissions and limitations under 12  * the License. 13  */ 14  15 package com.google.common.escape; 16  17 import static com.google.common.base.Preconditions.checkNotNull; 18  19 import com.google.common.annotations.Beta; 20 import com.google.common.annotations.GwtCompatible; 21 import com.google.errorprone.annotations.CanIgnoreReturnValue; 22 import java.util.HashMap; 23 import java.util.Map; 24 import javax.annotation.CheckForNull; 25 import org.checkerframework.checker.nullness.qual.Nullable; 26  27 /** 28  * Static utility methods pertaining to {@link Escaper} instances. 29  * 30  * @author Sven Mawson 31  * @author David Beaumont 32  * @since 15.0 33  */ 34 @Beta 35 @GwtCompatible 36 @ElementTypesAreNonnullByDefault 37 public final class Escapers { 38  private Escapers() {} 39  40  /** 41  * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged. 42  */ 43  public static Escaper nullEscaper() { 44  return NULL_ESCAPER; 45  } 46  47  // An Escaper that efficiently performs no escaping. 48  // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier. 49  private static final Escaper NULL_ESCAPER = 50  new CharEscaper() { 51  @Override 52  public String escape(String string) { 53  return checkNotNull(string); 54  } 55  56  @Override 57  @CheckForNull 58  protected char[] escape(char c) { 59  // TODO: Fix tests not to call this directly and make it throw an error. 60  return null; 61  } 62  }; 63  64  /** 65  * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each 66  * escaper that is created will be a snapshot of the current builder state. Builders are not 67  * thread safe. 68  * 69  * <p>The initial state of the builder is such that: 70  * 71  * <ul> 72  * <li>There are no replacement mappings 73  * <li>{@code safeMin == Character.MIN_VALUE} 74  * <li>{@code safeMax == Character.MAX_VALUE} 75  * <li>{@code unsafeReplacement == null} 76  * </ul> 77  * 78  * <p>For performance reasons escapers created by this builder are not Unicode aware and will not 79  * validate the well-formedness of their input. 80  */ 81  public static Builder builder() { 82  return new Builder(); 83  } 84  85  /** 86  * A builder for simple, fast escapers. 87  * 88  * <p>Typically an escaper needs to deal with the escaping of high valued characters or code 89  * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link 90  * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for 91  * creating escapers that replace a relative small set of characters. 92  * 93  * @author David Beaumont 94  * @since 15.0 95  */ 96  @Beta 97  public static final class Builder { 98  private final Map<Character, String> replacementMap = new HashMap<>(); 99  private char safeMin = Character.MIN_VALUE; 100  private char safeMax = Character.MAX_VALUE; 101  @CheckForNull private String unsafeReplacement = null; 102  103  // The constructor is exposed via the builder() method above. 104  private Builder() {} 105  106  /** 107  * Sets the safe range of characters for the escaper. Characters in this range that have no 108  * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code 109  * safeMax < safeMin} then the safe range is empty. 110  * 111  * @param safeMin the lowest 'safe' character 112  * @param safeMax the highest 'safe' character 113  * @return the builder instance 114  */ 115  @CanIgnoreReturnValue 116  public Builder setSafeRange(char safeMin, char safeMax) { 117  this.safeMin = safeMin; 118  this.safeMax = safeMax; 119  return this; 120  } 121  122  /** 123  * Sets the replacement string for any characters outside the 'safe' range that have no explicit 124  * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if 125  * it is {@code ""} then the unsafe characters are removed from the output. 126  * 127  * @param unsafeReplacement the string to replace unsafe characters 128  * @return the builder instance 129  */ 130  @CanIgnoreReturnValue 131  public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) { 132  this.unsafeReplacement = unsafeReplacement; 133  return this; 134  } 135  136  /** 137  * Adds a replacement string for the given input character. The specified character will be 138  * replaced by the given string whenever it occurs in the input, irrespective of whether it lies 139  * inside or outside the 'safe' range. 140  * 141  * @param c the character to be replaced 142  * @param replacement the string to replace the given character 143  * @return the builder instance 144  * @throws NullPointerException if {@code replacement} is null 145  */ 146  @CanIgnoreReturnValue 147  public Builder addEscape(char c, String replacement) { 148  checkNotNull(replacement); 149  // This can replace an existing character (the builder is re-usable). 150  replacementMap.put(c, replacement); 151  return this; 152  } 153  154  /** Returns a new escaper based on the current state of the builder. */ 155  public Escaper build() { 156  return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) { 157  @CheckForNull 158  private final char[] replacementChars = 159  unsafeReplacement != null ? unsafeReplacement.toCharArray() : null; 160  161  @Override 162  @CheckForNull 163  protected char[] escapeUnsafe(char c) { 164  return replacementChars; 165  } 166  }; 167  } 168  } 169  170  /** 171  * Returns a {@link UnicodeEscaper} equivalent to the given escaper instance. If the escaper is 172  * already a UnicodeEscaper then it is simply returned, otherwise it is wrapped in a 173  * UnicodeEscaper. 174  * 175  * <p>When a {@link CharEscaper} escaper is wrapped by this method it acquires extra behavior with 176  * respect to the well-formedness of Unicode character sequences and will throw {@link 177  * IllegalArgumentException} when given bad input. 178  * 179  * @param escaper the instance to be wrapped 180  * @return a UnicodeEscaper with the same behavior as the given instance 181  * @throws NullPointerException if escaper is null 182  * @throws IllegalArgumentException if escaper is not a UnicodeEscaper or a CharEscaper 183  */ 184  static UnicodeEscaper asUnicodeEscaper(Escaper escaper) { 185  checkNotNull(escaper); 186  if (escaper instanceof UnicodeEscaper) { 187  return (UnicodeEscaper) escaper; 188  } else if (escaper instanceof CharEscaper) { 189  return wrap((CharEscaper) escaper); 190  } 191  // In practice this shouldn't happen because it would be very odd not to 192  // extend either CharEscaper or UnicodeEscaper for non trivial cases. 193  throw new IllegalArgumentException( 194  "Cannot create a UnicodeEscaper from: " + escaper.getClass().getName()); 195  } 196  197  /** 198  * Returns a string that would replace the given character in the specified escaper, or {@code 199  * null} if no replacement should be made. This method is intended for use in tests through the 200  * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves 201  * to its public interface. 202  * 203  * @param c the character to escape if necessary 204  * @return the replacement string, or {@code null} if no escaping was needed 205  */ 206  @CheckForNull 207  public static String computeReplacement(CharEscaper escaper, char c) { 208  return stringOrNull(escaper.escape(c)); 209  } 210  211  /** 212  * Returns a string that would replace the given character in the specified escaper, or {@code 213  * null} if no replacement should be made. This method is intended for use in tests through the 214  * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit 215  * themselves to its public interface. 216  * 217  * @param cp the Unicode code point to escape if necessary 218  * @return the replacement string, or {@code null} if no escaping was needed 219  */ 220  @CheckForNull 221  public static String computeReplacement(UnicodeEscaper escaper, int cp) { 222  return stringOrNull(escaper.escape(cp)); 223  } 224  225  @CheckForNull 226  private static String stringOrNull(@CheckForNull char[] in) { 227  return (in == null) ? null : new String(in); 228  } 229  230  /** Private helper to wrap a CharEscaper as a UnicodeEscaper. */ 231  private static UnicodeEscaper wrap(final CharEscaper escaper) { 232  return new UnicodeEscaper() { 233  @Override 234  @CheckForNull 235  protected char[] escape(int cp) { 236  // If a code point maps to a single character, just escape that. 237  if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 238  return escaper.escape((char) cp); 239  } 240  // Convert the code point to a surrogate pair and escape them both. 241  // Note: This code path is horribly slow and typically allocates 4 new 242  // char[] each time it is invoked. However this avoids any 243  // synchronization issues and makes the escaper thread safe. 244  char[] surrogateChars = new char[2]; 245  Character.toChars(cp, surrogateChars, 0); 246  char[] hiChars = escaper.escape(surrogateChars[0]); 247  char[] loChars = escaper.escape(surrogateChars[1]); 248  249  // If either hiChars or lowChars are non-null, the CharEscaper is trying 250  // to escape the characters of a surrogate pair separately. This is 251  // uncommon and applies only to escapers that assume UCS-2 rather than 252  // UTF-16. See: http://en.wikipedia.org/wiki/UTF-16/UCS-2 253  if (hiChars == null && loChars == null) { 254  // We expect this to be the common code path for most escapers. 255  return null; 256  } 257  // Combine the characters and/or escaped sequences into a single array. 258  int hiCount = hiChars != null ? hiChars.length : 1; 259  int loCount = loChars != null ? loChars.length : 1; 260  char[] output = new char[hiCount + loCount]; 261  if (hiChars != null) { 262  // TODO: Is this faster than System.arraycopy() for small arrays? 263  for (int n = 0; n < hiChars.length; ++n) { 264  output[n] = hiChars[n]; 265  } 266  } else { 267  output[0] = surrogateChars[0]; 268  } 269  if (loChars != null) { 270  for (int n = 0; n < loChars.length; ++n) { 271  output[hiCount + n] = loChars[n]; 272  } 273  } else { 274  output[hiCount] = surrogateChars[1]; 275  } 276  return output; 277  } 278  }; 279  } 280 }