/* * Copyright (C) 2008 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.common.escape; import com.google.common.annotations.Beta; import com.google.common.annotations.GwtCompatible; import com.google.common.base.Function; /** * An object that converts literal text into a format safe for inclusion in a * particular context (such as an XML document). Typically (but not always), the * inverse process of "unescaping" the text is performed automatically by the * relevant parser. * *
* For example, an XML escaper would convert the literal string
* {@code "Foo
* An {@code Escaper} instance is required to be stateless, and safe when used
* concurrently by multiple threads.
*
*
* Because, in general, escaping operates on the code points of a string and not
* on its individual {@code char} values, it is not safe to assume that
* {@code escape(s)} is equivalent to
* {@code escape(s.substring(0, n)) + escape(s.substing(n))} for arbitrary
* {@code n}. This is because of the possibility of splitting a surrogate pair.
* The only case in which it is safe to escape strings and concatenate the
* results is if you can rule out this possibility, either by splitting an
* existing long string into short strings adaptively around
* {@linkplain Character#isHighSurrogate surrogate}
* {@linkplain Character#isLowSurrogate pairs}, or by starting with short
* strings already known to be free of unpaired surrogates.
*
*
* The two primary implementations of this interface are {@link CharEscaper} and
* {@link UnicodeEscaper}. They are heavily optimized for performance and
* greatly simplify the task of implementing new escapers. It is strongly
* recommended that when implementing a new escaper you extend one of these
* classes. If you find that you are unable to achieve the desired behavior
* using either of these classes, please contact the Java libraries team for
* advice.
*
*
* Several popular escapers are defined as constants in classes like
* {@link com.google.common.html.HtmlEscapers},
* {@link com.google.common.xml.XmlEscapers}, and {@link SourceCodeEscapers}. To
* create your own escapers, use {@link CharEscaperBuilder}, or extend
* {@code CharEscaper} or {@code UnicodeEscaper}.
*
* @author David Beaumont
* @since 15.0
*/
@Beta
@GwtCompatible
public abstract class Escaper {
// TODO(user): evaluate custom implementations, considering package private
// constructor.
/** Constructor for use by subclasses. */
protected Escaper() {
}
/**
* Returns the escaped form of a given literal string.
*
*
* Note that this method may treat input characters differently depending on the
* specific escaper implementation.
*
*
*
*
* @param string the literal string to be escaped
* @return the escaped form of {@code string}
* @throws NullPointerException if {@code string} is null
* @throws IllegalArgumentException if {@code string} contains badly formed
* UTF-16 or cannot be escaped for any other
* reason
*/
public abstract String escape(String string);
private final Function