123 lines
4.7 KiB
Java
123 lines
4.7 KiB
Java
/*
|
|
* Copyright (C) 2008 The Guava Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.google.common.escape;
|
|
|
|
import com.google.common.annotations.Beta;
|
|
import com.google.common.annotations.GwtCompatible;
|
|
import com.google.common.base.Function;
|
|
|
|
/**
|
|
* An object that converts literal text into a format safe for inclusion in a
|
|
* particular context (such as an XML document). Typically (but not always), the
|
|
* inverse process of "unescaping" the text is performed automatically by the
|
|
* relevant parser.
|
|
*
|
|
* <p>
|
|
* For example, an XML escaper would convert the literal string
|
|
* {@code "Foo<Bar>"} into {@code
|
|
* "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML
|
|
* tag. When the resulting XML document is parsed, the parser API will return
|
|
* this text as the original literal string {@code "Foo<Bar>"}.
|
|
*
|
|
* <p>
|
|
* An {@code Escaper} instance is required to be stateless, and safe when used
|
|
* concurrently by multiple threads.
|
|
*
|
|
* <p>
|
|
* Because, in general, escaping operates on the code points of a string and not
|
|
* on its individual {@code char} values, it is not safe to assume that
|
|
* {@code escape(s)} is equivalent to
|
|
* {@code escape(s.substring(0, n)) + escape(s.substing(n))} for arbitrary
|
|
* {@code n}. This is because of the possibility of splitting a surrogate pair.
|
|
* The only case in which it is safe to escape strings and concatenate the
|
|
* results is if you can rule out this possibility, either by splitting an
|
|
* existing long string into short strings adaptively around
|
|
* {@linkplain Character#isHighSurrogate surrogate}
|
|
* {@linkplain Character#isLowSurrogate pairs}, or by starting with short
|
|
* strings already known to be free of unpaired surrogates.
|
|
*
|
|
* <p>
|
|
* The two primary implementations of this interface are {@link CharEscaper} and
|
|
* {@link UnicodeEscaper}. They are heavily optimized for performance and
|
|
* greatly simplify the task of implementing new escapers. It is strongly
|
|
* recommended that when implementing a new escaper you extend one of these
|
|
* classes. If you find that you are unable to achieve the desired behavior
|
|
* using either of these classes, please contact the Java libraries team for
|
|
* advice.
|
|
*
|
|
* <p>
|
|
* Several popular escapers are defined as constants in classes like
|
|
* {@link com.google.common.html.HtmlEscapers},
|
|
* {@link com.google.common.xml.XmlEscapers}, and {@link SourceCodeEscapers}. To
|
|
* create your own escapers, use {@link CharEscaperBuilder}, or extend
|
|
* {@code CharEscaper} or {@code UnicodeEscaper}.
|
|
*
|
|
* @author David Beaumont
|
|
* @since 15.0
|
|
*/
|
|
@Beta
|
|
@GwtCompatible
|
|
public abstract class Escaper {
|
|
// TODO(user): evaluate custom implementations, considering package private
|
|
// constructor.
|
|
/** Constructor for use by subclasses. */
|
|
protected Escaper() {
|
|
}
|
|
|
|
/**
|
|
* Returns the escaped form of a given literal string.
|
|
*
|
|
* <p>
|
|
* Note that this method may treat input characters differently depending on the
|
|
* specific escaper implementation.
|
|
*
|
|
* <ul>
|
|
* <li>{@link UnicodeEscaper} handles
|
|
* <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a> correctly, including
|
|
* surrogate character pairs. If the input is badly formed the escaper should
|
|
* throw {@link IllegalArgumentException}.
|
|
* <li>{@link CharEscaper} handles Java characters independently and does not
|
|
* verify the input for well formed characters. A {@code CharEscaper} should not
|
|
* be used in situations where input is not guaranteed to be restricted to the
|
|
* Basic Multilingual Plane (BMP).
|
|
* </ul>
|
|
*
|
|
* @param string the literal string to be escaped
|
|
* @return the escaped form of {@code string}
|
|
* @throws NullPointerException if {@code string} is null
|
|
* @throws IllegalArgumentException if {@code string} contains badly formed
|
|
* UTF-16 or cannot be escaped for any other
|
|
* reason
|
|
*/
|
|
public abstract String escape(String string);
|
|
|
|
private final Function<String, String> asFunction = new Function<String, String>() {
|
|
@Override
|
|
public String apply(String from) {
|
|
return escape(from);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Returns a {@link Function} that invokes {@link #escape(String)} on this
|
|
* escaper.
|
|
*/
|
|
public final Function<String, String> asFunction() {
|
|
return asFunction;
|
|
}
|
|
}
|