Coverage Summary for Class: ICU4Jv46TextNormalizer (com.acciente.oacc.normalizer.icu4j)
Class | Method, % | Line, % |
---|---|---|
ICU4Jv46TextNormalizer | 100% (6/ 6) | 100% (18/ 18) |
ICU4Jv46TextNormalizer$LazyInitSingletonHolder | 100% (2/ 2) | 100% (2/ 2) |
total | 100% (8/ 8) | 100% (20/ 20) |
1 /*
2 * Copyright 2009-2018, Acciente LLC
3 *
4 * Acciente LLC licenses this file to you under the
5 * Apache License, Version 2.0 (the "License"); you
6 * may not use this file except in compliance with the
7 * License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in
12 * writing, software distributed under the License is
13 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
14 * OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing
16 * permissions and limitations under the License.
17 */
18
19 package com.acciente.oacc.normalizer.icu4j;
20
21 import com.acciente.oacc.normalizer.TextNormalizer;
22 import com.ibm.icu.text.Normalizer2;
23
24 import java.nio.CharBuffer;
25
26 /**
27 * Why is there this specific implementation of the TextNormalizer for ICU4J version 4.6 and higher?
28 * -------------------------------------------------------------------------------------------------
29 * In ICU4J versions prior to version 4.6 the methods to normalize text were in a class named Normalizer.
30 * Starting with ICU4J version 4.6 there is a new class named Normalizer2 with the normalization methods,
31 * and the methods in the old Normalizer class delegate to the methods in the new Normalizer2 class.
32 *
33 * The legacy Normalizer class delegates to a method of the new Normalizer2 class that allocates an internal
34 * working StringBuilder that is not externally accessible and therefore not cleanable.
35 *
36 * This class directly uses the new Normalizer2 via the method does *not* allocate an internal working
37 * StringBuilder, it also takes precautions to minimize the chance of the Normalizer2 leaking an
38 * inaccessible/non-cleanable buffer containing the source text (for details see note below about
39 * "Using ICU4J to ensure cleanable passwords").
40 */
41 public class ICU4Jv46TextNormalizer extends TextNormalizer {
42 // constants
43 private static final char ZERO_CHAR = '\0';
44
45 // state
46 private final Normalizer2 nfcNormalizer;
47
48 // we use the singleton holder pattern to lazy initialize the singleton instance
49 // in a thread safe manner without the need for any explicit locking
50 // (see https://en.wikipedia.org/wiki/Initialization-on-demand_holder_idiom).
51 private static class LazyInitSingletonHolder {
52 private static final TextNormalizer INSTANCE = new ICU4Jv46TextNormalizer();
53 }
54
55 private ICU4Jv46TextNormalizer() {
56 nfcNormalizer = Normalizer2Factory.getNFCInstance();
57 }
58
59 public static TextNormalizer getInstance() {
60 return LazyInitSingletonHolder.INSTANCE;
61 }
62
63 @Override
64 public char[] normalizeToNfc(char[] charArraySource) {
65 /*
66 * Using ICU4J to ensure cleanable passwords
67 * -----------------------------------------
68 * Using ICU4J, without requisite precautions, does not ensure that the contents of the source
69 * char sequence is not copied to a sequence that not accessible to the caller -- and therefore
70 * not cleanable. The following two precautions needed are:
71 *
72 * 1) Only use the Normalizer2#normalize(CharSequence, StringBuilder) method. While it is
73 * clear that the Normalizer2#normalize(CharSequence) method should not be used (since it
74 * returns an immutable string) it turns out that we also need to avoid using the
75 * Normalizer2#normalize(CharSequence, Appendable) method since it allocates an internal
76 * StringBuilder instance for intermediate processing. In contrast, the
77 * Normalizer2#normalize(CharSequence, StringBuilder) method uses the caller-provided
78 * StringBuilder for the intermediate processing, which now takes us to the next precaution
79 * needed.
80 *
81 * 2) When using the {@link Normalizer2#normalize(CharSequence, StringBuilder)} method, if the
82 * destination StringBuilder does not have sufficient capacity and is automatically expanded, then
83 * we cause a non-cleanable char array with the partial contents to be "leaked" (i.e. we have no
84 * access to this char array). This is because to increase its capacity the StringBuilder allocates
85 * a new char array buffer, and releases its reference to the old buffer with its contents intact.
86 * To prevent this we need to allocate a destination StringBuilder with enough capacity to handle
87 * the maximum expansion that can occur during NFC normalization. How much capacity do we need to
88 * allocate?
89 *
90 * According to (http://unicode.org/faq/normalization.html#12) the worst expansion for NFC is 3x. In
91 * tests of the ICU4J implementation, using the Unicode characters that cause the worst case expansion,
92 * it was verified that if the destination StringBuilder has a 3x initial capacity then the
93 * StringBuilder capacity does not increase (see ICU4JNormalizer2DestBufferWorstCaseExpansionTest).
94 */
95 final StringBuilder stringBuilderDest = new StringBuilder(3 * charArraySource.length);
96 nfcNormalizer.normalize(CharBuffer.wrap(charArraySource), stringBuilderDest);
97
98 // copy the result out of the StringBuilder, before clearing the character array buffer backing the StringBuilder
99 final char[] charArrayDest = copyContents(stringBuilderDest);
100
101 // zero out contents of the character array backing the StringBuilder
102 zeroOut(stringBuilderDest);
103
104 return charArrayDest;
105 }
106
107 /**
108 * Returns a copy of the contents of specified string builder.
109 *
110 * @param source
111 * @return a character array
112 */
113 private char[] copyContents(StringBuilder source) {
114 final char[] copy = new char[source.length()];
115 source.getChars(0, copy.length, copy, 0);
116 return copy;
117 }
118
119 /**
120 * Sets all contents in the specified string builder to {@value ZERO_CHAR}.
121 *
122 * @param dest the StringBuilder to zero out
123 */
124 private void zeroOut(StringBuilder dest) {
125 dest.setLength(dest.capacity());
126 for (int i = 0; i < dest.length(); i++) {
127 dest.setCharAt(i, ZERO_CHAR);
128 }
129 dest.setLength(0);
130 }
131 }