Coverage Summary for Class: TextNormalizer (com.acciente.oacc.normalizer)

Class	Class, %	Method, %	Line, %
TextNormalizer	100% (1/ 1)	100% (2/ 2)	33.3% (2/ 6)
1 /*
2  * Copyright 2009-2018, Acciente LLC
3  *
4  * Acciente LLC licenses this file to you under the
5  * Apache License, Version 2.0 (the "License"); you
6  * may not use this file except in compliance with the
7  * License. You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in
12  * writing, software distributed under the License is
13  * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
14  * OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing
16  * permissions and limitations under the License.
17  */
18 
19 package com.acciente.oacc.normalizer;
20 
21 import com.acciente.oacc.normalizer.icu4j.ICU4Jv26TextNormalizer;
22 import com.acciente.oacc.normalizer.icu4j.ICU4Jv46TextNormalizer;
23 import com.acciente.oacc.normalizer.jdk.JDKTextNormalizer;
24 
25 /**
26  * Normalizes Unicode text to handle characters that have more than one canonically equivalent representation.
27  * <p>
28  * This is important when comparing hashed passwords because plaintext that visually looks the same might actually
29  * be represented differently binarily, without the user being aware. For example, `é` (the letter `e` with accent acute)
30  * may be represented as a single Unicode character (U+00E9) or composed of two characters (U+0065 + U+0301), but both
31  * representations are canonically equivalent.
32  * <p>
33  * This class first tries to use the ICU4J library for normalization because it normalizes character arrays
34  * without converting to <code>String</code>. If ICU4J is not available, then it falls back to the text normalizer
35  * provided by the JDK, which produces an **intermediate <code>String</code> representation** of the text.
36  * <p>
37  * In other words, if you need to prevent a cleanable <code>char[]</code> password being turned into a temporary
38  * <code>String</code> during Unicode character normalization, you need to include a dependency to ICU4J.
39  */
40 public abstract class TextNormalizer {
41    /**
42     * Get an instance of a text normalizer.
43     * <p>
44     * If the ICU4J library is available, the returned instance will use an ICU4J normalizer, which handles character
45     * arrays without converting to <code>String</code>. Otherwise (if ICU4J is not available), the fallback instance
46     * returned uses the normalizer provided by the JDK, which produces an **intermediate <code>String</code>
47     * representation** of the normalized text.
48     *
49     * @return a text normalizer instance
50     */
51    public static TextNormalizer getInstance() {
52       try {
53          // first see if a newer version of ICU4J is available
54          return ICU4Jv46TextNormalizer.getInstance();
55       }
56       catch (NoClassDefFoundError e1) {
57          try {
58             // next see if an older version of ICU4J is available
59             return ICU4Jv26TextNormalizer.getInstance();
60          }
61          catch (NoClassDefFoundError e2) {
62             // otherwise fallback to the non-cleanable JDK based implementation
63             return JDKTextNormalizer.getInstance();
64          }
65       }
66    }
67 
68    /**
69     * Returns the canonically equivalent normalized (NFC) version of a Unicode character array.
70     * <p>
71     * Note:
72     * If the ICU4J library for normalization is not available, the fallback Normalizer provided by the JDK
73     * will produce an intermediate <code>String</code> representation of the normalized text!
74     *
75     * @param source any Unicode text
76     * @return a character array containing the normalized representation of the source text
77     */
78    public abstract char[] normalizeToNfc(char[] source);
79 }