Update #48 - Added some features from OptiFine

2025-06-28 02:48:14 -05:00 · 2025-01-24 18:39:36 -08:00
parent 1f0d593a8c
commit e83a912e38
1056 changed files with 17706 additions and 898 deletions
--- a/sources/main/java/jdk_internal/icu/text/BidiBase.java
+++ b/sources/main/java/jdk_internal/icu/text/BidiBase.java
--- a/sources/main/java/jdk_internal/icu/text/BidiLine.java
+++ b/sources/main/java/jdk_internal/icu/text/BidiLine.java
@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+*******************************************************************************
+*   Copyright (C) 2001-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+/* Written by Simon Montagu, Matitiahu Allouche
+ * (ported from C code written by Markus W. Scherer)
+ */
+
+package jdk_internal.icu.text;
+
+import java.util.Arrays;
+
+import jdk_internal.bidi.Bidi;
+
+final class BidiLine {
+
+	/*
+	 * General remarks about the functions in this file:
+	 *
+	 * These functions deal with the aspects of potentially mixed-directional text
+	 * in a single paragraph or in a line of a single paragraph which has already
+	 * been processed according to the Unicode 3.0 Bidi algorithm as defined in <a
+	 * href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9: Unicode
+	 * Bidirectional Algorithm</a>, version 13, also described in The Unicode
+	 * Standard, Version 4.0.1 .
+	 *
+	 * This means that there is a Bidi object with a levels and a dirProps array.
+	 * paraLevel and direction are also set. Only if the length of the text is zero,
+	 * then levels==dirProps==NULL.
+	 *
+	 * The overall directionality of the paragraph or line is used to bypass the
+	 * reordering steps if possible. Even purely RTL text does not need reordering
+	 * there because the getLogical/VisualIndex() methods can compute the index on
+	 * the fly in such a case.
+	 *
+	 * The implementation of the access to same-level-runs and of the reordering do
+	 * attempt to provide better performance and less memory usage compared to a
+	 * direct implementation of especially rule (L2) with an array of one (32-bit)
+	 * integer per text character.
+	 *
+	 * Here, the levels array is scanned as soon as necessary, and a vector of
+	 * same-level-runs is created. Reordering then is done on this vector. For each
+	 * run of text positions that were resolved to the same level, only 8 bytes are
+	 * stored: the first text position of the run and the visual position behind the
+	 * run after reordering. One sign bit is used to hold the directionality of the
+	 * run. This is inefficient if there are many very short runs. If the average
+	 * run length is <2, then this uses more memory.
+	 *
+	 * In a further attempt to save memory, the levels array is never changed after
+	 * all the resolution rules (Xn, Wn, Nn, In). Many methods have to consider the
+	 * field trailingWSStart: if it is less than length, then there is an implicit
+	 * trailing run at the paraLevel, which is not reflected in the levels array.
+	 * This allows a line Bidi object to use the same levels array as its paragraph
+	 * parent object.
+	 *
+	 * When a Bidi object is created for a line of a paragraph, then the paragraph's
+	 * levels and dirProps arrays are reused by way of setting a pointer into them,
+	 * not by copying. This again saves memory and forbids to change the now shared
+	 * levels for (L1).
+	 */
+
+	/* handle trailing WS (L1) -------------------------------------------------- */
+
+	/*
+	 * setTrailingWSStart() sets the start index for a trailing run of WS in the
+	 * line. This is necessary because we do not modify the paragraph's levels array
+	 * that we just point into. Using trailingWSStart is another form of performing
+	 * (L1).
+	 *
+	 * To make subsequent operations easier, we also include the run before the WS
+	 * if it is at the paraLevel - we merge the two here.
+	 *
+	 * This method is called only from setLine(), so paraLevel is set correctly for
+	 * the line even when contextual multiple paragraphs.
+	 */
+
+	static void setTrailingWSStart(BidiBase bidiBase) {
+		byte[] dirProps = bidiBase.dirProps;
+		byte[] levels = bidiBase.levels;
+		int start = bidiBase.length;
+		byte paraLevel = bidiBase.paraLevel;
+
+		/*
+		 * If the line is terminated by a block separator, all preceding WS etc... are
+		 * already set to paragraph level. Setting trailingWSStart to pBidi->length will
+		 * avoid changing the level of B chars from 0 to paraLevel in getLevels when
+		 * orderParagraphsLTR==TRUE
+		 */
+		if (dirProps[start - 1] == BidiBase.B) {
+			bidiBase.trailingWSStart = start; /* currently == bidiBase.length */
+			return;
+		}
+		/* go backwards across all WS, BN, explicit codes */
+		while (start > 0 && (BidiBase.DirPropFlag(dirProps[start - 1]) & BidiBase.MASK_WS) != 0) {
+			--start;
+		}
+
+		/* if the WS run can be merged with the previous run then do so here */
+		while (start > 0 && levels[start - 1] == paraLevel) {
+			--start;
+		}
+
+		bidiBase.trailingWSStart = start;
+	}
+
+	static Bidi setLine(BidiBase paraBidi, Bidi newBidi, BidiBase lineBidi, int start, int limit) {
+		int length;
+
+		/* set the values in lineBidi from its paraBidi parent */
+		/* class members are already initialized to 0 */
+		// lineBidi.paraBidi = null; /* mark unfinished setLine */
+		// lineBidi.flags = 0;
+		// lineBidi.controlCount = 0;
+
+		length = lineBidi.length = lineBidi.originalLength = lineBidi.resultLength = limit - start;
+
+		lineBidi.text = new char[length];
+		System.arraycopy(paraBidi.text, start, lineBidi.text, 0, length);
+		lineBidi.paraLevel = paraBidi.GetParaLevelAt(start);
+		lineBidi.paraCount = paraBidi.paraCount;
+		lineBidi.runs = new BidiRun[0];
+		lineBidi.reorderingMode = paraBidi.reorderingMode;
+		lineBidi.reorderingOptions = paraBidi.reorderingOptions;
+		if (paraBidi.controlCount > 0) {
+			int j;
+			for (j = start; j < limit; j++) {
+				if (BidiBase.IsBidiControlChar(paraBidi.text[j])) {
+					lineBidi.controlCount++;
+				}
+			}
+			lineBidi.resultLength -= lineBidi.controlCount;
+		}
+		/* copy proper subset of DirProps */
+		lineBidi.getDirPropsMemory(length);
+		lineBidi.dirProps = lineBidi.dirPropsMemory;
+		System.arraycopy(paraBidi.dirProps, start, lineBidi.dirProps, 0, length);
+		/* copy proper subset of Levels */
+		lineBidi.getLevelsMemory(length);
+		lineBidi.levels = lineBidi.levelsMemory;
+		System.arraycopy(paraBidi.levels, start, lineBidi.levels, 0, length);
+		lineBidi.runCount = -1;
+
+		if (paraBidi.direction != BidiBase.MIXED) {
+			/* the parent is already trivial */
+			lineBidi.direction = paraBidi.direction;
+
+			/*
+			 * The parent's levels are all either implicitly or explicitly ==paraLevel; do
+			 * the same here.
+			 */
+			if (paraBidi.trailingWSStart <= start) {
+				lineBidi.trailingWSStart = 0;
+			} else if (paraBidi.trailingWSStart < limit) {
+				lineBidi.trailingWSStart = paraBidi.trailingWSStart - start;
+			} else {
+				lineBidi.trailingWSStart = length;
+			}
+		} else {
+			byte[] levels = lineBidi.levels;
+			int i, trailingWSStart;
+			byte level;
+
+			setTrailingWSStart(lineBidi);
+			trailingWSStart = lineBidi.trailingWSStart;
+
+			/* recalculate lineBidiBase.direction */
+			if (trailingWSStart == 0) {
+				/* all levels are at paraLevel */
+				lineBidi.direction = (byte) (lineBidi.paraLevel & 1);
+			} else {
+				/* get the level of the first character */
+				level = (byte) (levels[0] & 1);
+
+				/*
+				 * if there is anything of a different level, then the line is mixed
+				 */
+				if (trailingWSStart < length && (lineBidi.paraLevel & 1) != level) {
+					/*
+					 * the trailing WS is at paraLevel, which differs from levels[0]
+					 */
+					lineBidi.direction = BidiBase.MIXED;
+				} else {
+					/*
+					 * see if levels[1..trailingWSStart-1] have the same direction as levels[0] and
+					 * paraLevel
+					 */
+					for (i = 1;; i++) {
+						if (i == trailingWSStart) {
+							/* the direction values match those in level */
+							lineBidi.direction = level;
+							break;
+						} else if ((levels[i] & 1) != level) {
+							lineBidi.direction = BidiBase.MIXED;
+							break;
+						}
+					}
+				}
+			}
+
+			switch (lineBidi.direction) {
+			case Bidi.DIRECTION_LEFT_TO_RIGHT:
+				/* make sure paraLevel is even */
+				lineBidi.paraLevel = (byte) ((lineBidi.paraLevel + 1) & ~1);
+
+				/*
+				 * all levels are implicitly at paraLevel (important for getLevels())
+				 */
+				lineBidi.trailingWSStart = 0;
+				break;
+			case Bidi.DIRECTION_RIGHT_TO_LEFT:
+				/* make sure paraLevel is odd */
+				lineBidi.paraLevel |= 1;
+
+				/*
+				 * all levels are implicitly at paraLevel (important for getLevels())
+				 */
+				lineBidi.trailingWSStart = 0;
+				break;
+			default:
+				break;
+			}
+		}
+
+		lineBidi.paraBidi = paraBidi; /* mark successful setLine */
+
+		return newBidi;
+	}
+
+	static byte getLevelAt(BidiBase bidiBase, int charIndex) {
+		/* return paraLevel if in the trailing WS run, otherwise the real level */
+		if (bidiBase.direction != BidiBase.MIXED || charIndex >= bidiBase.trailingWSStart) {
+			return bidiBase.GetParaLevelAt(charIndex);
+		} else {
+			return bidiBase.levels[charIndex];
+		}
+	}
+
+	static byte[] getLevels(BidiBase bidiBase) {
+		int start = bidiBase.trailingWSStart;
+		int length = bidiBase.length;
+
+		if (start != length) {
+			/* the current levels array does not reflect the WS run */
+			/*
+			 * After the previous if(), we know that the levels array has an implicit
+			 * trailing WS run and therefore does not fully reflect itself all the levels.
+			 * This must be a Bidi object for a line, and we need to create a new levels
+			 * array.
+			 */
+			/*
+			 * bidiBase.paraLevel is ok even if contextual multiple paragraphs, since
+			 * bidiBase is a line object
+			 */
+			Arrays.fill(bidiBase.levels, start, length, bidiBase.paraLevel);
+
+			/* this new levels array is set for the line and reflects the WS run */
+			bidiBase.trailingWSStart = length;
+		}
+		if (length < bidiBase.levels.length) {
+			byte[] levels = new byte[length];
+			System.arraycopy(bidiBase.levels, 0, levels, 0, length);
+			return levels;
+		}
+		return bidiBase.levels;
+	}
+
+	static BidiRun getVisualRun(BidiBase bidiBase, int runIndex) {
+		int start = bidiBase.runs[runIndex].start;
+		int limit;
+		byte level = bidiBase.runs[runIndex].level;
+
+		if (runIndex > 0) {
+			limit = start + bidiBase.runs[runIndex].limit - bidiBase.runs[runIndex - 1].limit;
+		} else {
+			limit = start + bidiBase.runs[0].limit;
+		}
+		return new BidiRun(start, limit, level);
+	}
+
+	/* in trivial cases there is only one trivial run; called by getRuns() */
+	private static void getSingleRun(BidiBase bidiBase, byte level) {
+		/* simple, single-run case */
+		bidiBase.runs = bidiBase.simpleRuns;
+		bidiBase.runCount = 1;
+
+		/* fill and reorder the single run */
+		bidiBase.runs[0] = new BidiRun(0, bidiBase.length, level);
+	}
+
+	/* reorder the runs array (L2) ---------------------------------------------- */
+
+	/*
+	 * Reorder the same-level runs in the runs array. Here, runCount>1 and
+	 * maxLevel>=minLevel>=paraLevel. All the visualStart fields=logical start
+	 * before reordering. The "odd" bits are not set yet.
+	 *
+	 * Reordering with this data structure lends itself to some handy shortcuts:
+	 *
+	 * Since each run is moved but not modified, and since at the initial maxLevel
+	 * each sequence of same-level runs consists of only one run each, we don't need
+	 * to do anything there and can predecrement maxLevel. In many simple cases, the
+	 * reordering is thus done entirely in the index mapping. Also, reordering
+	 * occurs only down to the lowest odd level that occurs, which is minLevel|1.
+	 * However, if the lowest level itself is odd, then in the last reordering the
+	 * sequence of the runs at this level or higher will be all runs, and we don't
+	 * need the elaborate loop to search for them. This is covered by ++minLevel
+	 * instead of minLevel|=1 followed by an extra reorder-all after the
+	 * reorder-some loop. About a trailing WS run: Such a run would need special
+	 * treatment because its level is not reflected in levels[] if this is not a
+	 * paragraph object. Instead, all characters from trailingWSStart on are
+	 * implicitly at paraLevel. However, for all maxLevel>paraLevel, this run will
+	 * never be reordered and does not need to be taken into account.
+	 * maxLevel==paraLevel is only reordered if minLevel==paraLevel is odd, which is
+	 * done in the extra segment. This means that for the main reordering loop we
+	 * don't need to consider this run and can --runCount. If it is later part of
+	 * the all-runs reordering, then runCount is adjusted accordingly.
+	 */
+	private static void reorderLine(BidiBase bidiBase, byte minLevel, byte maxLevel) {
+
+		/* nothing to do? */
+		if (maxLevel <= (minLevel | 1)) {
+			return;
+		}
+
+		BidiRun[] runs;
+		BidiRun tempRun;
+		byte[] levels;
+		int firstRun, endRun, limitRun, runCount;
+
+		/*
+		 * Reorder only down to the lowest odd level and reorder at an odd minLevel in a
+		 * separate, simpler loop. See comments above for why minLevel is always
+		 * incremented.
+		 */
+		++minLevel;
+
+		runs = bidiBase.runs;
+		levels = bidiBase.levels;
+		runCount = bidiBase.runCount;
+
+		/*
+		 * do not include the WS run at paraLevel<=old minLevel except in the simple
+		 * loop
+		 */
+		if (bidiBase.trailingWSStart < bidiBase.length) {
+			--runCount;
+		}
+
+		while (--maxLevel >= minLevel) {
+			firstRun = 0;
+
+			/* loop for all sequences of runs */
+			for (;;) {
+				/* look for a sequence of runs that are all at >=maxLevel */
+				/* look for the first run of such a sequence */
+				while (firstRun < runCount && levels[runs[firstRun].start] < maxLevel) {
+					++firstRun;
+				}
+				if (firstRun >= runCount) {
+					break; /* no more such runs */
+				}
+
+				/* look for the limit run of such a sequence (the run behind it) */
+				for (limitRun = firstRun; ++limitRun < runCount && levels[runs[limitRun].start] >= maxLevel;) {
+				}
+
+				/* Swap the entire sequence of runs from firstRun to limitRun-1. */
+				endRun = limitRun - 1;
+				while (firstRun < endRun) {
+					tempRun = runs[firstRun];
+					runs[firstRun] = runs[endRun];
+					runs[endRun] = tempRun;
+					++firstRun;
+					--endRun;
+				}
+
+				if (limitRun == runCount) {
+					break; /* no more such runs */
+				} else {
+					firstRun = limitRun + 1;
+				}
+			}
+		}
+
+		/* now do maxLevel==old minLevel (==odd!), see above */
+		if ((minLevel & 1) == 0) {
+			firstRun = 0;
+
+			/* include the trailing WS run in this complete reordering */
+			if (bidiBase.trailingWSStart == bidiBase.length) {
+				--runCount;
+			}
+
+			/* Swap the entire sequence of all runs. (endRun==runCount) */
+			while (firstRun < runCount) {
+				tempRun = runs[firstRun];
+				runs[firstRun] = runs[runCount];
+				runs[runCount] = tempRun;
+				++firstRun;
+				--runCount;
+			}
+		}
+	}
+
+	/* compute the runs array --------------------------------------------------- */
+
+	static int getRunFromLogicalIndex(BidiBase bidiBase, int logicalIndex) {
+		BidiRun[] runs = bidiBase.runs;
+		int runCount = bidiBase.runCount, visualStart = 0, i, length, logicalStart;
+
+		for (i = 0; i < runCount; i++) {
+			length = runs[i].limit - visualStart;
+			logicalStart = runs[i].start;
+			if ((logicalIndex >= logicalStart) && (logicalIndex < (logicalStart + length))) {
+				return i;
+			}
+			visualStart += length;
+		}
+		/* we should never get here */
+		throw new IllegalStateException("Internal ICU error in getRunFromLogicalIndex");
+	}
+
+	/*
+	 * Compute the runs array from the levels array. After getRuns() returns true,
+	 * runCount is guaranteed to be >0 and the runs are reordered. Odd-level runs
+	 * have visualStart on their visual right edge and they progress visually to the
+	 * left. If option OPTION_INSERT_MARKS is set, insertRemove will contain the sum
+	 * of appropriate LRM/RLM_BEFORE/AFTER flags. If option OPTION_REMOVE_CONTROLS
+	 * is set, insertRemove will contain the negative number of BiDi control
+	 * characters within this run.
+	 */
+	static void getRuns(BidiBase bidiBase) {
+		/*
+		 * This method returns immediately if the runs are already set. This includes
+		 * the case of length==0 (handled in setPara)..
+		 */
+		if (bidiBase.runCount >= 0) {
+			return;
+		}
+		if (bidiBase.direction != BidiBase.MIXED) {
+			/* simple, single-run case - this covers length==0 */
+			/* bidiBase.paraLevel is ok even for contextual multiple paragraphs */
+			getSingleRun(bidiBase, bidiBase.paraLevel);
+		} else /* BidiBase.MIXED, length>0 */ {
+			/* mixed directionality */
+			int length = bidiBase.length, limit;
+			byte[] levels = bidiBase.levels;
+			int i, runCount;
+			byte level = -1; /* initialize with no valid level */
+			/*
+			 * If there are WS characters at the end of the line and the run preceding them
+			 * has a level different from paraLevel, then they will form their own run at
+			 * paraLevel (L1). Count them separately. We need some special treatment for
+			 * this in order to not modify the levels array which a line Bidi object shares
+			 * with its paragraph parent and its other line siblings. In other words, for
+			 * the trailing WS, it may be levels[]!=paraLevel but we have to treat it like
+			 * it were so.
+			 */
+			limit = bidiBase.trailingWSStart;
+			/* count the runs, there is at least one non-WS run, and limit>0 */
+			runCount = 0;
+			for (i = 0; i < limit; ++i) {
+				/* increment runCount at the start of each run */
+				if (levels[i] != level) {
+					++runCount;
+					level = levels[i];
+				}
+			}
+
+			/*
+			 * We don't need to see if the last run can be merged with a trailing WS run
+			 * because setTrailingWSStart() would have done that.
+			 */
+			if (runCount == 1 && limit == length) {
+				/* There is only one non-WS run and no trailing WS-run. */
+				getSingleRun(bidiBase, levels[0]);
+			} else /* runCount>1 || limit<length */ {
+				/* allocate and set the runs */
+				BidiRun[] runs;
+				int runIndex, start;
+				byte minLevel = BidiBase.MAX_EXPLICIT_LEVEL + 1;
+				byte maxLevel = 0;
+
+				/* now, count a (non-mergeable) WS run */
+				if (limit < length) {
+					++runCount;
+				}
+
+				/* runCount > 1 */
+				bidiBase.getRunsMemory(runCount);
+				runs = bidiBase.runsMemory;
+
+				/* set the runs */
+				/*
+				 * FOOD FOR THOUGHT: this could be optimized, e.g.: 464->444, 484->444,
+				 * 575->555, 595->555 However, that would take longer. Check also how it would
+				 * interact with BiDi control removal and inserting Marks.
+				 */
+				runIndex = 0;
+
+				/*
+				 * search for the run limits and initialize visualLimit values with the run
+				 * lengths
+				 */
+				i = 0;
+				do {
+					/* prepare this run */
+					start = i;
+					level = levels[i];
+					if (level < minLevel) {
+						minLevel = level;
+					}
+					if (level > maxLevel) {
+						maxLevel = level;
+					}
+
+					/* look for the run limit */
+					while (++i < limit && levels[i] == level) {
+					}
+
+					/* i is another run limit */
+					runs[runIndex] = new BidiRun(start, i - start, level);
+					++runIndex;
+				} while (i < limit);
+
+				if (limit < length) {
+					/* there is a separate WS run */
+					runs[runIndex] = new BidiRun(limit, length - limit, bidiBase.paraLevel);
+					/*
+					 * For the trailing WS run, bidiBase.paraLevel is ok even if contextual multiple
+					 * paragraphs.
+					 */
+					if (bidiBase.paraLevel < minLevel) {
+						minLevel = bidiBase.paraLevel;
+					}
+				}
+
+				/* set the object fields */
+				bidiBase.runs = runs;
+				bidiBase.runCount = runCount;
+
+				reorderLine(bidiBase, minLevel, maxLevel);
+
+				/* now add the direction flags and adjust the visualLimit's to be just that */
+				/* this loop will also handle the trailing WS run */
+				limit = 0;
+				for (i = 0; i < runCount; ++i) {
+					runs[i].level = levels[runs[i].start];
+					limit = (runs[i].limit += limit);
+				}
+
+				/* Set the embedding level for the trailing WS run. */
+				/* For a RTL paragraph, it will be the *first* run in visual order. */
+				/*
+				 * For the trailing WS run, bidiBase.paraLevel is ok even if contextual multiple
+				 * paragraphs.
+				 */
+				if (runIndex < runCount) {
+					int trailingRun = ((bidiBase.paraLevel & 1) != 0) ? 0 : runIndex;
+					runs[trailingRun].level = bidiBase.paraLevel;
+				}
+			}
+		}
+
+		/* handle insert LRM/RLM BEFORE/AFTER run */
+		if (bidiBase.insertPoints.size > 0) {
+			BidiBase.Point point;
+			int runIndex, ip;
+			for (ip = 0; ip < bidiBase.insertPoints.size; ip++) {
+				point = bidiBase.insertPoints.points[ip];
+				runIndex = getRunFromLogicalIndex(bidiBase, point.pos);
+				bidiBase.runs[runIndex].insertRemove |= point.flag;
+			}
+		}
+
+		/* handle remove BiDi control characters */
+		if (bidiBase.controlCount > 0) {
+			int runIndex, ic;
+			char c;
+			for (ic = 0; ic < bidiBase.length; ic++) {
+				c = bidiBase.text[ic];
+				if (BidiBase.IsBidiControlChar(c)) {
+					runIndex = getRunFromLogicalIndex(bidiBase, ic);
+					bidiBase.runs[runIndex].insertRemove--;
+				}
+			}
+		}
+	}
+
+	static int[] prepareReorder(byte[] levels, byte[] pMinLevel, byte[] pMaxLevel) {
+		int start;
+		byte level, minLevel, maxLevel;
+
+		if (levels == null || levels.length <= 0) {
+			return null;
+		}
+
+		/* determine minLevel and maxLevel */
+		minLevel = BidiBase.MAX_EXPLICIT_LEVEL + 1;
+		maxLevel = 0;
+		for (start = levels.length; start > 0;) {
+			level = levels[--start];
+			if (level < 0 || level > (BidiBase.MAX_EXPLICIT_LEVEL + 1)) {
+				return null;
+			}
+			if (level < minLevel) {
+				minLevel = level;
+			}
+			if (level > maxLevel) {
+				maxLevel = level;
+			}
+		}
+		pMinLevel[0] = minLevel;
+		pMaxLevel[0] = maxLevel;
+
+		/* initialize the index map */
+		int[] indexMap = new int[levels.length];
+		for (start = levels.length; start > 0;) {
+			--start;
+			indexMap[start] = start;
+		}
+
+		return indexMap;
+	}
+
+	static int[] reorderVisual(byte[] levels) {
+		byte[] aMinLevel = new byte[1];
+		byte[] aMaxLevel = new byte[1];
+		int start, end, limit, temp;
+		byte minLevel, maxLevel;
+
+		int[] indexMap = prepareReorder(levels, aMinLevel, aMaxLevel);
+		if (indexMap == null) {
+			return null;
+		}
+
+		minLevel = aMinLevel[0];
+		maxLevel = aMaxLevel[0];
+
+		/* nothing to do? */
+		if (minLevel == maxLevel && (minLevel & 1) == 0) {
+			return indexMap;
+		}
+
+		/* reorder only down to the lowest odd level */
+		minLevel |= 1;
+
+		/* loop maxLevel..minLevel */
+		do {
+			start = 0;
+
+			/* loop for all sequences of levels to reorder at the current maxLevel */
+			for (;;) {
+				/* look for a sequence of levels that are all at >=maxLevel */
+				/* look for the first index of such a sequence */
+				while (start < levels.length && levels[start] < maxLevel) {
+					++start;
+				}
+				if (start >= levels.length) {
+					break; /* no more such runs */
+				}
+
+				/* look for the limit of such a sequence (the index behind it) */
+				for (limit = start; ++limit < levels.length && levels[limit] >= maxLevel;) {
+				}
+
+				/*
+				 * Swap the entire interval of indexes from start to limit-1. We don't need to
+				 * swap the levels for the purpose of this algorithm: the sequence of levels
+				 * that we look at does not move anyway.
+				 */
+				end = limit - 1;
+				while (start < end) {
+					temp = indexMap[start];
+					indexMap[start] = indexMap[end];
+					indexMap[end] = temp;
+
+					++start;
+					--end;
+				}
+
+				if (limit == levels.length) {
+					break; /* no more such sequences */
+				} else {
+					start = limit + 1;
+				}
+			}
+		} while (--maxLevel >= minLevel);
+
+		return indexMap;
+	}
+
+	static int[] getVisualMap(BidiBase bidiBase) {
+		/* fill a visual-to-logical index map using the runs[] */
+		BidiRun[] runs = bidiBase.runs;
+		int logicalStart, visualStart, visualLimit;
+		int allocLength = bidiBase.length > bidiBase.resultLength ? bidiBase.length : bidiBase.resultLength;
+		int[] indexMap = new int[allocLength];
+
+		visualStart = 0;
+		int idx = 0;
+		for (int j = 0; j < bidiBase.runCount; ++j) {
+			logicalStart = runs[j].start;
+			visualLimit = runs[j].limit;
+			if (runs[j].isEvenRun()) {
+				do { /* LTR */
+					indexMap[idx++] = logicalStart++;
+				} while (++visualStart < visualLimit);
+			} else {
+				logicalStart += visualLimit - visualStart; /* logicalLimit */
+				do { /* RTL */
+					indexMap[idx++] = --logicalStart;
+				} while (++visualStart < visualLimit);
+			}
+			/* visualStart==visualLimit; */
+		}
+
+		if (bidiBase.insertPoints.size > 0) {
+			int markFound = 0, runCount = bidiBase.runCount;
+			int insertRemove, i, j, k;
+			runs = bidiBase.runs;
+			/* count all inserted marks */
+			for (i = 0; i < runCount; i++) {
+				insertRemove = runs[i].insertRemove;
+				if ((insertRemove & (BidiBase.LRM_BEFORE | BidiBase.RLM_BEFORE)) > 0) {
+					markFound++;
+				}
+				if ((insertRemove & (BidiBase.LRM_AFTER | BidiBase.RLM_AFTER)) > 0) {
+					markFound++;
+				}
+			}
+			/* move back indexes by number of preceding marks */
+			k = bidiBase.resultLength;
+			for (i = runCount - 1; i >= 0 && markFound > 0; i--) {
+				insertRemove = runs[i].insertRemove;
+				if ((insertRemove & (BidiBase.LRM_AFTER | BidiBase.RLM_AFTER)) > 0) {
+					indexMap[--k] = BidiBase.MAP_NOWHERE;
+					markFound--;
+				}
+				visualStart = i > 0 ? runs[i - 1].limit : 0;
+				for (j = runs[i].limit - 1; j >= visualStart && markFound > 0; j--) {
+					indexMap[--k] = indexMap[j];
+				}
+				if ((insertRemove & (BidiBase.LRM_BEFORE | BidiBase.RLM_BEFORE)) > 0) {
+					indexMap[--k] = BidiBase.MAP_NOWHERE;
+					markFound--;
+				}
+			}
+		} else if (bidiBase.controlCount > 0) {
+			int runCount = bidiBase.runCount, logicalEnd;
+			int insertRemove, length, i, j, k, m;
+			char uchar;
+			boolean evenRun;
+			runs = bidiBase.runs;
+			visualStart = 0;
+			/* move forward indexes by number of preceding controls */
+			k = 0;
+			for (i = 0; i < runCount; i++, visualStart += length) {
+				length = runs[i].limit - visualStart;
+				insertRemove = runs[i].insertRemove;
+				/* if no control found yet, nothing to do in this run */
+				if ((insertRemove == 0) && (k == visualStart)) {
+					k += length;
+					continue;
+				}
+				/* if no control in this run */
+				if (insertRemove == 0) {
+					visualLimit = runs[i].limit;
+					for (j = visualStart; j < visualLimit; j++) {
+						indexMap[k++] = indexMap[j];
+					}
+					continue;
+				}
+				logicalStart = runs[i].start;
+				evenRun = runs[i].isEvenRun();
+				logicalEnd = logicalStart + length - 1;
+				for (j = 0; j < length; j++) {
+					m = evenRun ? logicalStart + j : logicalEnd - j;
+					uchar = bidiBase.text[m];
+					if (!BidiBase.IsBidiControlChar(uchar)) {
+						indexMap[k++] = m;
+					}
+				}
+			}
+		}
+		if (allocLength == bidiBase.resultLength) {
+			return indexMap;
+		}
+		int[] newMap = new int[bidiBase.resultLength];
+		System.arraycopy(indexMap, 0, newMap, 0, bidiBase.resultLength);
+		return newMap;
+	}
+
+}
--- a/sources/main/java/jdk_internal/icu/text/BidiRun.java
+++ b/sources/main/java/jdk_internal/icu/text/BidiRun.java
@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+/* Written by Simon Montagu, Matitiahu Allouche
+ * (ported from C code written by Markus W. Scherer)
+ */
+
+package jdk_internal.icu.text;
+
+/**
+ * A BidiRun represents a sequence of characters at the same embedding level.
+ * The Bidi algorithm decomposes a piece of text into sequences of characters at
+ * the same embedding level, each such sequence is called a "run".
+ *
+ * <p>
+ * A BidiRun represents such a run by storing its essential properties, but does
+ * not duplicate the characters which form the run.
+ *
+ * <p>
+ * The &quot;limit&quot; of the run is the position just after the last
+ * character, i.e., one more than that position.
+ *
+ * <p>
+ * This class has no public constructor, and its members cannot be modified by
+ * users.
+ *
+ * @see com.ibm.icu.text.Bidi
+ */
+class BidiRun {
+
+	int start; /* first logical position of the run */
+	int limit; /* last visual position of the run +1 */
+	int insertRemove; /*
+						 * if >0, flags for inserting LRM/RLM before/after run, if <0, count of bidi
+						 * controls within run
+						 */
+	byte level;
+
+	/*
+	 * Default constructor
+	 *
+	 * Note that members start and limit of a run instance have different meanings
+	 * depending whether the run is part of the runs array of a Bidi object, or if
+	 * it is a reference returned by getVisualRun() or getLogicalRun(). For a member
+	 * of the runs array of a Bidi object, - start is the first logical position of
+	 * the run in the source text. - limit is one after the last visual position of
+	 * the run. For a reference returned by getLogicalRun() or getVisualRun(), -
+	 * start is the first logical position of the run in the source text. - limit is
+	 * one after the last logical position of the run.
+	 */
+	BidiRun() {
+		this(0, 0, (byte) 0);
+	}
+
+	/*
+	 * Constructor
+	 */
+	BidiRun(int start, int limit, byte embeddingLevel) {
+		this.start = start;
+		this.limit = limit;
+		this.level = embeddingLevel;
+	}
+
+	/*
+	 * Copy the content of a BidiRun instance
+	 */
+	void copyFrom(BidiRun run) {
+		this.start = run.start;
+		this.limit = run.limit;
+		this.level = run.level;
+		this.insertRemove = run.insertRemove;
+	}
+
+	/**
+	 * Get level of run
+	 */
+	byte getEmbeddingLevel() {
+		return level;
+	}
+
+	/**
+	 * Check if run level is even
+	 * 
+	 * @return true if the embedding level of this run is even, i.e. it is a
+	 *         left-to-right run.
+	 */
+	boolean isEvenRun() {
+		return (level & 1) == 0;
+	}
+
+}
--- a/sources/main/java/jdk_internal/icu/text/BidiWriter.java
+++ b/sources/main/java/jdk_internal/icu/text/BidiWriter.java
@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+*******************************************************************************
+*   Copyright (C) 2001-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+/* Written by Simon Montagu, Matitiahu Allouche
+ * (ported from C code written by Markus W. Scherer)
+ */
+
+package jdk_internal.icu.text;
+
+import jdk_internal.icu.lang.UCharacter;
+
+final class BidiWriter {
+
+	/** Bidi control code points */
+	static final char LRM_CHAR = 0x200e;
+	static final char RLM_CHAR = 0x200f;
+	static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | 1 << UCharacter.RIGHT_TO_LEFT_ARABIC);
+
+	private static boolean IsCombining(int type) {
+		return ((1 << type & (1 << UCharacter.NON_SPACING_MARK | 1 << UCharacter.COMBINING_SPACING_MARK
+				| 1 << UCharacter.ENCLOSING_MARK)) != 0);
+	}
+
+	/*
+	 * When we have OUTPUT_REVERSE set on writeReordered(), then we semantically
+	 * write RTL runs in reverse and later reverse them again. Instead, we actually
+	 * write them in forward order to begin with. However, if the RTL run was to be
+	 * mirrored, we need to mirror here now since the implicit second reversal must
+	 * not do it. It looks strange to do mirroring in LTR output, but it is only
+	 * because we are writing RTL output in reverse.
+	 */
+	private static String doWriteForward(String src, int options) {
+		/* optimize for several combinations of options */
+		switch (options & (BidiBase.REMOVE_BIDI_CONTROLS | BidiBase.DO_MIRRORING)) {
+		case 0: {
+			/* simply return the LTR run */
+			return src;
+		}
+		case BidiBase.DO_MIRRORING: {
+			StringBuffer dest = new StringBuffer(src.length());
+
+			/* do mirroring */
+			int i = 0;
+			int c;
+
+			do {
+				c = UTF16.charAt(src, i);
+				i += UTF16.getCharCount(c);
+				UTF16.append(dest, UCharacter.getMirror(c));
+			} while (i < src.length());
+			return dest.toString();
+		}
+		case BidiBase.REMOVE_BIDI_CONTROLS: {
+			StringBuilder dest = new StringBuilder(src.length());
+
+			/* copy the LTR run and remove any Bidi control characters */
+			int i = 0;
+			char c;
+			do {
+				c = src.charAt(i++);
+				if (!BidiBase.IsBidiControlChar(c)) {
+					dest.append(c);
+				}
+			} while (i < src.length());
+			return dest.toString();
+		}
+		default: {
+			StringBuffer dest = new StringBuffer(src.length());
+
+			/* remove Bidi control characters and do mirroring */
+			int i = 0;
+			int c;
+			do {
+				c = UTF16.charAt(src, i);
+				i += UTF16.getCharCount(c);
+				if (!BidiBase.IsBidiControlChar(c)) {
+					UTF16.append(dest, UCharacter.getMirror(c));
+				}
+			} while (i < src.length());
+			return dest.toString();
+		}
+		} /* end of switch */
+	}
+
+	private static String doWriteForward(char[] text, int start, int limit, int options) {
+		return doWriteForward(new String(text, start, limit - start), options);
+	}
+
+	static String writeReverse(String src, int options) {
+		/*
+		 * RTL run -
+		 *
+		 * RTL runs need to be copied to the destination in reverse order of code
+		 * points, not code units, to keep Unicode characters intact.
+		 *
+		 * The general strategy for this is to read the source text in backward order,
+		 * collect all code units for a code point (and optionally following combining
+		 * characters, see below), and copy all these code units in ascending order to
+		 * the destination for this run.
+		 *
+		 * Several options request whether combining characters should be kept after
+		 * their base characters, whether Bidi control characters should be removed, and
+		 * whether characters should be replaced by their mirror-image equivalent
+		 * Unicode characters.
+		 */
+		StringBuffer dest = new StringBuffer(src.length());
+
+		/* optimize for several combinations of options */
+		switch (options & (BidiBase.REMOVE_BIDI_CONTROLS | BidiBase.DO_MIRRORING | BidiBase.KEEP_BASE_COMBINING)) {
+
+		case 0:
+			/*
+			 * With none of the "complicated" options set, the destination run will have the
+			 * same length as the source run, and there is no mirroring and no keeping
+			 * combining characters with their base characters.
+			 *
+			 * XXX: or dest = UTF16.reverse(new StringBuffer(src));
+			 */
+
+			int srcLength = src.length();
+
+			/* preserve character integrity */
+			do {
+				/*
+				 * i is always after the last code unit known to need to be kept in this segment
+				 */
+				int i = srcLength;
+
+				/* collect code units for one base character */
+				srcLength -= UTF16.getCharCount(UTF16.charAt(src, srcLength - 1));
+
+				/* copy this base character */
+				dest.append(src.substring(srcLength, i));
+			} while (srcLength > 0);
+			break;
+
+		case BidiBase.KEEP_BASE_COMBINING:
+			/*
+			 * Here, too, the destination run will have the same length as the source run,
+			 * and there is no mirroring. We do need to keep combining characters with their
+			 * base characters.
+			 */
+			srcLength = src.length();
+
+			/* preserve character integrity */
+			do {
+				/*
+				 * i is always after the last code unit known to need to be kept in this segment
+				 */
+				int c;
+				int i = srcLength;
+
+				/*
+				 * collect code units and modifier letters for one base character
+				 */
+				do {
+					c = UTF16.charAt(src, srcLength - 1);
+					srcLength -= UTF16.getCharCount(c);
+				} while (srcLength > 0 && IsCombining(UCharacter.getType(c)));
+
+				/* copy this "user character" */
+				dest.append(src.substring(srcLength, i));
+			} while (srcLength > 0);
+			break;
+
+		default:
+			/*
+			 * With several "complicated" options set, this is the most general and the
+			 * slowest copying of an RTL run. We will do mirroring, remove Bidi controls,
+			 * and keep combining characters with their base characters as requested.
+			 */
+			srcLength = src.length();
+
+			/* preserve character integrity */
+			do {
+				/*
+				 * i is always after the last code unit known to need to be kept in this segment
+				 */
+				int i = srcLength;
+
+				/* collect code units for one base character */
+				int c = UTF16.charAt(src, srcLength - 1);
+				srcLength -= UTF16.getCharCount(c);
+				if ((options & BidiBase.KEEP_BASE_COMBINING) != 0) {
+					/* collect modifier letters for this base character */
+					while (srcLength > 0 && IsCombining(UCharacter.getType(c))) {
+						c = UTF16.charAt(src, srcLength - 1);
+						srcLength -= UTF16.getCharCount(c);
+					}
+				}
+
+				if ((options & BidiBase.REMOVE_BIDI_CONTROLS) != 0 && BidiBase.IsBidiControlChar(c)) {
+					/* do not copy this Bidi control character */
+					continue;
+				}
+
+				/* copy this "user character" */
+				int j = srcLength;
+				if ((options & BidiBase.DO_MIRRORING) != 0) {
+					/* mirror only the base character */
+					c = UCharacter.getMirror(c);
+					UTF16.append(dest, c);
+					j += UTF16.getCharCount(c);
+				}
+				dest.append(src.substring(j, i));
+			} while (srcLength > 0);
+			break;
+		} /* end of switch */
+
+		return dest.toString();
+	}
+
+	static String doWriteReverse(char[] text, int start, int limit, int options) {
+		return writeReverse(new String(text, start, limit - start), options);
+	}
+
+	static String writeReordered(BidiBase bidi, int options) {
+		int run, runCount;
+		StringBuilder dest;
+		char[] text = bidi.text;
+		runCount = bidi.countRuns();
+
+		/*
+		 * Option "insert marks" implies BidiBase.INSERT_LRM_FOR_NUMERIC if the
+		 * reordering mode (checked below) is appropriate.
+		 */
+		if ((bidi.reorderingOptions & BidiBase.OPTION_INSERT_MARKS) != 0) {
+			options |= BidiBase.INSERT_LRM_FOR_NUMERIC;
+			options &= ~BidiBase.REMOVE_BIDI_CONTROLS;
+		}
+		/*
+		 * Option "remove controls" implies BidiBase.REMOVE_BIDI_CONTROLS and cancels
+		 * BidiBase.INSERT_LRM_FOR_NUMERIC.
+		 */
+		if ((bidi.reorderingOptions & BidiBase.OPTION_REMOVE_CONTROLS) != 0) {
+			options |= BidiBase.REMOVE_BIDI_CONTROLS;
+			options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
+		}
+		/*
+		 * If we do not perform the "inverse Bidi" algorithm, then we don't need to
+		 * insert any LRMs, and don't need to test for it.
+		 */
+		if ((bidi.reorderingMode != BidiBase.REORDER_INVERSE_NUMBERS_AS_L)
+				&& (bidi.reorderingMode != BidiBase.REORDER_INVERSE_LIKE_DIRECT)
+				&& (bidi.reorderingMode != BidiBase.REORDER_INVERSE_FOR_NUMBERS_SPECIAL)
+				&& (bidi.reorderingMode != BidiBase.REORDER_RUNS_ONLY)) {
+			options &= ~BidiBase.INSERT_LRM_FOR_NUMERIC;
+		}
+		dest = new StringBuilder((options & BidiBase.INSERT_LRM_FOR_NUMERIC) != 0 ? bidi.length * 2 : bidi.length);
+		/*
+		 * Iterate through all visual runs and copy the run text segments to the
+		 * destination, according to the options.
+		 *
+		 * The tests for where to insert LRMs ignore the fact that there may be BN codes
+		 * or non-BMP code points at the beginning and end of a run; they may insert
+		 * LRMs unnecessarily but the tests are faster this way (this would have to be
+		 * improved for UTF-8).
+		 */
+		if ((options & BidiBase.OUTPUT_REVERSE) == 0) {
+			/* forward output */
+			if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
+				/* do not insert Bidi controls */
+				for (run = 0; run < runCount; ++run) {
+					BidiRun bidiRun = bidi.getVisualRun(run);
+					if (bidiRun.isEvenRun()) {
+						dest.append(
+								doWriteForward(text, bidiRun.start, bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
+					} else {
+						dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options));
+					}
+				}
+			} else {
+				/* insert Bidi controls for "inverse Bidi" */
+				byte[] dirProps = bidi.dirProps;
+				char uc;
+				int markFlag;
+
+				for (run = 0; run < runCount; ++run) {
+					BidiRun bidiRun = bidi.getVisualRun(run);
+					markFlag = 0;
+					/* check if something relevant in insertPoints */
+					markFlag = bidi.runs[run].insertRemove;
+					if (markFlag < 0) { /* bidi controls count */
+						markFlag = 0;
+					}
+					if (bidiRun.isEvenRun()) {
+						if (bidi.isInverse() && dirProps[bidiRun.start] != BidiBase.L) {
+							markFlag |= BidiBase.LRM_BEFORE;
+						}
+						if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
+							uc = LRM_CHAR;
+						} else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
+							uc = RLM_CHAR;
+						} else {
+							uc = 0;
+						}
+						if (uc != 0) {
+							dest.append(uc);
+						}
+						dest.append(
+								doWriteForward(text, bidiRun.start, bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
+
+						if (bidi.isInverse() && dirProps[bidiRun.limit - 1] != BidiBase.L) {
+							markFlag |= BidiBase.LRM_AFTER;
+						}
+						if ((markFlag & BidiBase.LRM_AFTER) != 0) {
+							uc = LRM_CHAR;
+						} else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
+							uc = RLM_CHAR;
+						} else {
+							uc = 0;
+						}
+						if (uc != 0) {
+							dest.append(uc);
+						}
+					} else { /* RTL run */
+						if (bidi.isInverse() && !bidi.testDirPropFlagAt(MASK_R_AL, bidiRun.limit - 1)) {
+							markFlag |= BidiBase.RLM_BEFORE;
+						}
+						if ((markFlag & BidiBase.LRM_BEFORE) != 0) {
+							uc = LRM_CHAR;
+						} else if ((markFlag & BidiBase.RLM_BEFORE) != 0) {
+							uc = RLM_CHAR;
+						} else {
+							uc = 0;
+						}
+						if (uc != 0) {
+							dest.append(uc);
+						}
+						dest.append(doWriteReverse(text, bidiRun.start, bidiRun.limit, options));
+
+						if (bidi.isInverse() && (MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
+							markFlag |= BidiBase.RLM_AFTER;
+						}
+						if ((markFlag & BidiBase.LRM_AFTER) != 0) {
+							uc = LRM_CHAR;
+						} else if ((markFlag & BidiBase.RLM_AFTER) != 0) {
+							uc = RLM_CHAR;
+						} else {
+							uc = 0;
+						}
+						if (uc != 0) {
+							dest.append(uc);
+						}
+					}
+				}
+			}
+		} else {
+			/* reverse output */
+			if ((options & BidiBase.INSERT_LRM_FOR_NUMERIC) == 0) {
+				/* do not insert Bidi controls */
+				for (run = runCount; --run >= 0;) {
+					BidiRun bidiRun = bidi.getVisualRun(run);
+					if (bidiRun.isEvenRun()) {
+						dest.append(
+								doWriteReverse(text, bidiRun.start, bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
+					} else {
+						dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options));
+					}
+				}
+			} else {
+				/* insert Bidi controls for "inverse Bidi" */
+
+				byte[] dirProps = bidi.dirProps;
+
+				for (run = runCount; --run >= 0;) {
+					/* reverse output */
+					BidiRun bidiRun = bidi.getVisualRun(run);
+					if (bidiRun.isEvenRun()) {
+						if (dirProps[bidiRun.limit - 1] != BidiBase.L) {
+							dest.append(LRM_CHAR);
+						}
+
+						dest.append(
+								doWriteReverse(text, bidiRun.start, bidiRun.limit, options & ~BidiBase.DO_MIRRORING));
+
+						if (dirProps[bidiRun.start] != BidiBase.L) {
+							dest.append(LRM_CHAR);
+						}
+					} else {
+						if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.start])) == 0) {
+							dest.append(RLM_CHAR);
+						}
+
+						dest.append(doWriteForward(text, bidiRun.start, bidiRun.limit, options));
+
+						if ((MASK_R_AL & BidiBase.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) {
+							dest.append(RLM_CHAR);
+						}
+					}
+				}
+			}
+		}
+
+		return dest.toString();
+	}
+}
--- a/sources/main/java/jdk_internal/icu/text/FilteredNormalizer2.java
+++ b/sources/main/java/jdk_internal/icu/text/FilteredNormalizer2.java
@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+*******************************************************************************
+*   Copyright (C) 2009-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*/
+package jdk_internal.icu.text;
+
+import java.io.IOException;
+
+/**
+ * Normalization filtered by a UnicodeSet. Normalizes portions of the text
+ * contained in the filter set and leaves portions not contained in the filter
+ * set unchanged. Filtering is done via UnicodeSet.span(...,
+ * UnicodeSet.SpanCondition.SIMPLE). Not-in-the-filter text is treated as "is
+ * normalized" and "quick check yes". This class implements all of (and only)
+ * the Normalizer2 API. An instance of this class is unmodifiable/immutable.
+ * 
+ * @stable ICU 4.4
+ * @author Markus W. Scherer
+ */
+class FilteredNormalizer2 extends Normalizer2 {
+
+	/**
+	 * Constructs a filtered normalizer wrapping any Normalizer2 instance and a
+	 * filter set. Both are aliased and must not be modified or deleted while this
+	 * object is used. The filter set should be frozen; otherwise the performance
+	 * will suffer greatly.
+	 * 
+	 * @param n2        wrapped Normalizer2 instance
+	 * @param filterSet UnicodeSet which determines the characters to be normalized
+	 * @stable ICU 4.4
+	 */
+	public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) {
+		norm2 = n2;
+		set = filterSet;
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public StringBuilder normalize(CharSequence src, StringBuilder dest) {
+		if (dest == src) {
+			throw new IllegalArgumentException();
+		}
+		dest.setLength(0);
+		normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
+		return dest;
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.6
+	 */
+	@Override
+	public Appendable normalize(CharSequence src, Appendable dest) {
+		if (dest == src) {
+			throw new IllegalArgumentException();
+		}
+		return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE);
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) {
+		return normalizeSecondAndAppend(first, second, true);
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public StringBuilder append(StringBuilder first, CharSequence second) {
+		return normalizeSecondAndAppend(first, second, false);
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.6
+	 */
+	@Override
+	public String getDecomposition(int c) {
+		return set.contains(c) ? norm2.getDecomposition(c) : null;
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 49
+	 */
+	@Override
+	public int getCombiningClass(int c) {
+		return set.contains(c) ? norm2.getCombiningClass(c) : 0;
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public boolean isNormalized(CharSequence s) {
+		UnicodeSet.SpanCondition spanCondition = UnicodeSet.SpanCondition.SIMPLE;
+		for (int prevSpanLimit = 0; prevSpanLimit < s.length();) {
+			int spanLimit = set.span(s, prevSpanLimit, spanCondition);
+			if (spanCondition == UnicodeSet.SpanCondition.NOT_CONTAINED) {
+				spanCondition = UnicodeSet.SpanCondition.SIMPLE;
+			} else {
+				if (!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) {
+					return false;
+				}
+				spanCondition = UnicodeSet.SpanCondition.NOT_CONTAINED;
+			}
+			prevSpanLimit = spanLimit;
+		}
+		return true;
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public int spanQuickCheckYes(CharSequence s) {
+		UnicodeSet.SpanCondition spanCondition = UnicodeSet.SpanCondition.SIMPLE;
+		for (int prevSpanLimit = 0; prevSpanLimit < s.length();) {
+			int spanLimit = set.span(s, prevSpanLimit, spanCondition);
+			if (spanCondition == UnicodeSet.SpanCondition.NOT_CONTAINED) {
+				spanCondition = UnicodeSet.SpanCondition.SIMPLE;
+			} else {
+				int yesLimit = prevSpanLimit + norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit));
+				if (yesLimit < spanLimit) {
+					return yesLimit;
+				}
+				spanCondition = UnicodeSet.SpanCondition.NOT_CONTAINED;
+			}
+			prevSpanLimit = spanLimit;
+		}
+		return s.length();
+	}
+
+	/**
+	 * {@inheritDoc}
+	 * 
+	 * @stable ICU 4.4
+	 */
+	@Override
+	public boolean hasBoundaryBefore(int c) {
+		return !set.contains(c) || norm2.hasBoundaryBefore(c);
+	}
+
+	// Internal: No argument checking, and appends to dest.
+	// Pass as input spanCondition the one that is likely to yield a non-zero
+	// span length at the start of src.
+	// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
+	// UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src
+	// and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue
+	// after
+	// an in-filter prefix.
+	private Appendable normalize(CharSequence src, Appendable dest, UnicodeSet.SpanCondition spanCondition) {
+		// Don't throw away destination buffer between iterations.
+		StringBuilder tempDest = new StringBuilder();
+		try {
+			for (int prevSpanLimit = 0; prevSpanLimit < src.length();) {
+				int spanLimit = set.span(src, prevSpanLimit, spanCondition);
+				int spanLength = spanLimit - prevSpanLimit;
+				if (spanCondition == UnicodeSet.SpanCondition.NOT_CONTAINED) {
+					if (spanLength != 0) {
+						dest.append(src, prevSpanLimit, spanLimit);
+					}
+					spanCondition = UnicodeSet.SpanCondition.SIMPLE;
+				} else {
+					if (spanLength != 0) {
+						// Not norm2.normalizeSecondAndAppend() because we do not want
+						// to modify the non-filter part of dest.
+						dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest));
+					}
+					spanCondition = UnicodeSet.SpanCondition.NOT_CONTAINED;
+				}
+				prevSpanLimit = spanLimit;
+			}
+		} catch (IOException e) {
+			throw new InternalError(e.toString(), e);
+		}
+		return dest;
+	}
+
+	private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second, boolean doNormalize) {
+		if (first == second) {
+			throw new IllegalArgumentException();
+		}
+		if (first.length() == 0) {
+			if (doNormalize) {
+				return normalize(second, first);
+			} else {
+				return first.append(second);
+			}
+		}
+		// merge the in-filter suffix of the first string with the in-filter prefix of
+		// the second
+		int prefixLimit = set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE);
+		if (prefixLimit != 0) {
+			CharSequence prefix = second.subSequence(0, prefixLimit);
+			int suffixStart = set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE);
+			if (suffixStart == 0) {
+				if (doNormalize) {
+					norm2.normalizeSecondAndAppend(first, prefix);
+				} else {
+					norm2.append(first, prefix);
+				}
+			} else {
+				StringBuilder middle = new StringBuilder(first.subSequence(suffixStart, first.length()));
+				if (doNormalize) {
+					norm2.normalizeSecondAndAppend(middle, prefix);
+				} else {
+					norm2.append(middle, prefix);
+				}
+				first.delete(suffixStart, 0x7fffffff).append(middle);
+			}
+		}
+		if (prefixLimit < second.length()) {
+			CharSequence rest = second.subSequence(prefixLimit, second.length());
+			if (doNormalize) {
+				normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED);
+			} else {
+				first.append(rest);
+			}
+		}
+		return first;
+	}
+
+	private Normalizer2 norm2;
+	private UnicodeSet set;
+};
--- a/sources/main/java/jdk_internal/icu/text/Normalizer2.java
+++ b/sources/main/java/jdk_internal/icu/text/Normalizer2.java
@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ *   Copyright (C) 2009-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk_internal.icu.text;
+
+import jdk_internal.icu.impl.Norm2AllModes;
+
+/**
+ * Unicode normalization functionality for standard Unicode normalization or for
+ * using custom mapping tables. All instances of this class are
+ * unmodifiable/immutable. The Normalizer2 class is not intended for public
+ * subclassing.
+ * <p>
+ * The primary functions are to produce a normalized string and to detect
+ * whether a string is already normalized. The most commonly used normalization
+ * forms are those defined in
+ * <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15:
+ * Unicode Normalization Forms</a>. However, this API supports additional
+ * normalization forms for specialized purposes. For example, NFKC_Casefold is
+ * provided via getInstance("nfkc_cf", COMPOSE) and can be used in
+ * implementations of UTS #46.
+ * <p>
+ * Not only are the standard compose and decompose modes supplied, but
+ * additional modes are provided as documented in the Mode enum.
+ * <p>
+ * Some of the functions in this class identify normalization boundaries. At a
+ * normalization boundary, the portions of the string before it and starting
+ * from it do not interact and can be handled independently.
+ * <p>
+ * The spanQuickCheckYes() stops at a normalization boundary. When the goal is a
+ * normalized string, then the text before the boundary can be copied, and the
+ * remainder can be processed with normalizeSecondAndAppend().
+ * <p>
+ * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test
+ * whether a character is guaranteed to be at a normalization boundary,
+ * regardless of context. This is used for moving from one normalization
+ * boundary to the next or preceding boundary, and for performing iterative
+ * normalization.
+ * <p>
+ * Iterative normalization is useful when only a small portion of a longer
+ * string needs to be processed. For example, in ICU, iterative normalization is
+ * used by the NormalizationTransliterator (to avoid replacing
+ * already-normalized text) and ucol_nextSortKeyPart() (to process only the
+ * substring for which sort key bytes are computed).
+ * <p>
+ * The set of normalization boundaries returned by these functions may not be
+ * complete: There may be more boundaries that could be returned. Different
+ * functions may return different boundaries.
+ * 
+ * @stable ICU 4.4
+ * @author Markus W. Scherer
+ */
+public abstract class Normalizer2 {
+
+	/**
+	 * Returns a Normalizer2 instance for Unicode NFC normalization. Same as
+	 * getInstance(null, "nfc", Mode.COMPOSE). Returns an unmodifiable singleton
+	 * instance.
+	 * 
+	 * @return the requested Normalizer2, if successful
+	 * @stable ICU 49
+	 */
+	public static Normalizer2 getNFCInstance() {
+		return Norm2AllModes.getNFCInstance().comp;
+	}
+
+	/**
+	 * Returns a Normalizer2 instance for Unicode NFD normalization. Same as
+	 * getInstance(null, "nfc", Mode.DECOMPOSE). Returns an unmodifiable singleton
+	 * instance.
+	 * 
+	 * @return the requested Normalizer2, if successful
+	 * @stable ICU 49
+	 */
+	public static Normalizer2 getNFDInstance() {
+		return Norm2AllModes.getNFCInstance().decomp;
+	}
+
+	/**
+	 * Returns a Normalizer2 instance for Unicode NFKC normalization. Same as
+	 * getInstance(null, "nfkc", Mode.COMPOSE). Returns an unmodifiable singleton
+	 * instance.
+	 * 
+	 * @return the requested Normalizer2, if successful
+	 * @stable ICU 49
+	 */
+	public static Normalizer2 getNFKCInstance() {
+		return Norm2AllModes.getNFKCInstance().comp;
+	}
+
+	/**
+	 * Returns a Normalizer2 instance for Unicode NFKD normalization. Same as
+	 * getInstance(null, "nfkc", Mode.DECOMPOSE). Returns an unmodifiable singleton
+	 * instance.
+	 * 
+	 * @return the requested Normalizer2, if successful
+	 * @stable ICU 49
+	 */
+	public static Normalizer2 getNFKDInstance() {
+		return Norm2AllModes.getNFKCInstance().decomp;
+	}
+
+	/**
+	 * Returns the normalized form of the source string.
+	 * 
+	 * @param src source string
+	 * @return normalized src
+	 * @stable ICU 4.4
+	 */
+	public String normalize(CharSequence src) {
+		if (src instanceof String) {
+			// Fastpath: Do not construct a new String if the src is a String
+			// and is already normalized.
+			int spanLength = spanQuickCheckYes(src);
+			if (spanLength == src.length()) {
+				return (String) src;
+			}
+			if (spanLength != 0) {
+				StringBuilder sb = new StringBuilder(src.length()).append(src, 0, spanLength);
+				return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString();
+			}
+		}
+		return normalize(src, new StringBuilder(src.length())).toString();
+	}
+
+	/**
+	 * Writes the normalized form of the source string to the destination string
+	 * (replacing its contents) and returns the destination string. The source and
+	 * destination strings must be different objects.
+	 * 
+	 * @param src  source string
+	 * @param dest destination string; its contents is replaced with normalized src
+	 * @return dest
+	 * @stable ICU 4.4
+	 */
+	public abstract StringBuilder normalize(CharSequence src, StringBuilder dest);
+
+	/**
+	 * Writes the normalized form of the source string to the destination Appendable
+	 * and returns the destination Appendable. The source and destination strings
+	 * must be different objects.
+	 *
+	 * <p>
+	 * Any {@link java.io.IOException} is wrapped into a
+	 * {@link com.ibm.icu.util.ICUUncheckedIOException}.
+	 *
+	 * @param src  source string
+	 * @param dest destination Appendable; gets normalized src appended
+	 * @return dest
+	 * @stable ICU 4.6
+	 */
+	public abstract Appendable normalize(CharSequence src, Appendable dest);
+
+	/**
+	 * Appends the normalized form of the second string to the first string (merging
+	 * them at the boundary) and returns the first string. The result is normalized
+	 * if the first string was normalized. The first and second strings must be
+	 * different objects.
+	 * 
+	 * @param first  string, should be normalized
+	 * @param second string, will be normalized
+	 * @return first
+	 * @stable ICU 4.4
+	 */
+	public abstract StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second);
+
+	/**
+	 * Appends the second string to the first string (merging them at the boundary)
+	 * and returns the first string. The result is normalized if both the strings
+	 * were normalized. The first and second strings must be different objects.
+	 * 
+	 * @param first  string, should be normalized
+	 * @param second string, should be normalized
+	 * @return first
+	 * @stable ICU 4.4
+	 */
+	public abstract StringBuilder append(StringBuilder first, CharSequence second);
+
+	/**
+	 * Gets the decomposition mapping of c. Roughly equivalent to normalizing the
+	 * String form of c on a DECOMPOSE Normalizer2 instance, but much faster, and
+	 * except that this function returns null if c does not have a decomposition
+	 * mapping in this instance's data. This function is independent of the mode of
+	 * the Normalizer2.
+	 * 
+	 * @param c code point
+	 * @return c's decomposition mapping, if any; otherwise null
+	 * @stable ICU 4.6
+	 */
+	public abstract String getDecomposition(int c);
+
+	/**
+	 * Gets the combining class of c. The default implementation returns 0 but all
+	 * standard implementations return the Unicode Canonical_Combining_Class value.
+	 * 
+	 * @param c code point
+	 * @return c's combining class
+	 * @stable ICU 49
+	 */
+	public int getCombiningClass(int c) {
+		return 0;
+	}
+
+	/**
+	 * Tests if the string is normalized. Internally, in cases where the
+	 * quickCheck() method would return "maybe" (which is only possible for the two
+	 * COMPOSE modes) this method resolves to "yes" or "no" to provide a definitive
+	 * result, at the cost of doing more work in those cases.
+	 * 
+	 * @param s input string
+	 * @return true if s is normalized
+	 * @stable ICU 4.4
+	 */
+	public abstract boolean isNormalized(CharSequence s);
+
+	/**
+	 * Returns the end of the normalized substring of the input string. In other
+	 * words, with <code>end=spanQuickCheckYes(s);</code> the substring
+	 * <code>s.subSequence(0, end)</code> will pass the quick check with a "yes"
+	 * result.
+	 * <p>
+	 * The returned end index is usually one or more characters before the "no" or
+	 * "maybe" character: The end index is at a normalization boundary. (See the
+	 * class documentation for more about normalization boundaries.)
+	 * <p>
+	 * When the goal is a normalized string and most input strings are expected to
+	 * be normalized already, then call this method, and if it returns a prefix
+	 * shorter than the input string, copy that prefix and use
+	 * normalizeSecondAndAppend() for the remainder.
+	 * 
+	 * @param s input string
+	 * @return "yes" span end index
+	 * @stable ICU 4.4
+	 */
+	public abstract int spanQuickCheckYes(CharSequence s);
+
+	/**
+	 * Tests if the character always has a normalization boundary before it,
+	 * regardless of context. If true, then the character does not
+	 * normalization-interact with preceding characters. In other words, a string
+	 * containing this character can be normalized by processing portions before
+	 * this character and starting from this character independently. This is used
+	 * for iterative normalization. See the class documentation for details.
+	 * 
+	 * @param c character to test
+	 * @return true if c has a normalization boundary before it
+	 * @stable ICU 4.4
+	 */
+	public abstract boolean hasBoundaryBefore(int c);
+
+	/**
+	 * Sole constructor. (For invocation by subclass constructors, typically
+	 * implicit.)
+	 * 
+	 * @internal deprecated This API is ICU internal only.
+	 */
+	protected Normalizer2() {
+	}
+}
--- a/sources/main/java/jdk_internal/icu/text/NormalizerBase.java
+++ b/sources/main/java/jdk_internal/icu/text/NormalizerBase.java
@ -0,0 +1,791 @@
+/*
+ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 2000-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+package jdk_internal.icu.text;
+
+import jdk_internal.bidi.CharacterIterator;
+import jdk_internal.bidi.Normalizer;
+import jdk_internal.icu.impl.Norm2AllModes;
+
+/**
+ * Unicode Normalization
+ *
+ * <h2>Unicode normalization API</h2>
+ *
+ * <code>normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/reports/tr15/" target="unicode"> Unicode
+ * Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in several
+ * different ways in Unicode. For example, take the character A-acute. In
+ * Unicode, this can be encoded as a single character (the "composed" form):
+ *
+ * <pre>
+ *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
+ * </pre>
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * <pre>
+ *      0041    LATIN CAPITAL LETTER A
+ *      0301    COMBINING ACUTE ACCENT
+ * </pre>
+ *
+ * To a user of your program, however, both of these sequences should be treated
+ * as the same "user-level" character "A with acute accent". When you are
+ * searching or comparing text, you must ensure that these two sequences are
+ * treated equivalently. In addition, you must handle characters with more than
+ * one accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * <pre>
+ *      0066    LATIN SMALL LETTER F
+ *      0066    LATIN SMALL LETTER F
+ *      0069    LATIN SMALL LETTER I
+ * </pre>
+ *
+ * or as the single character
+ *
+ * <pre>
+ *      FB03    LATIN SMALL LIGATURE FFI
+ * </pre>
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility with
+ * existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions into
+ * the corresponding semantic characters. When sorting and searching, you will
+ * often want to use these mappings.
+ *
+ * <code>normalize</code> helps solve these problems by transforming text into
+ * the canonical composed and decomposed forms as shown in the first example
+ * above. In addition, you can have it perform compatibility decompositions so
+ * that you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation. It allows to work on
+ * strings that are not necessarily normalized with an algorithm (like in
+ * collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of
+ * representation. Multiple strings may be canonically equivalent (their NFDs
+ * are identical) and may all conform to FCD without being identical themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive
+ * canonical decomposition of each character, results in a string that is
+ * canonically ordered. This means that precomposed characters are allowed for
+ * as long as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts
+ * - and many unnormalized texts - already conform to FCD and do not need to be
+ * normalized (NFD) for such a process. The FCD quick check will return YES for
+ * most strings in practice.
+ *
+ * normalize(FCD) may be implemented with NFD.
+ *
+ * For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence
+ * in Applications): http://www.unicode.org/notes/tn5/#FCD
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if
+ * normalization is turned on for the collator object. Beyond collation and
+ * string search, normalized strings may be useful for string equivalence
+ * comparisons, transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC. Note also that most
+ * legacy character encodings use only precomposed forms and often do not encode
+ * any combining marks by themselves. For conversion to such character encodings
+ * the Unicode text needs to be normalized to NFC. For more usage examples, see
+ * the Unicode Standard Annex.
+ *
+ * Note: The Normalizer class also provides API for iterative normalization.
+ * While the setIndex() and getIndex() refer to indices in the underlying
+ * Unicode input text, the next() and previous() methods iterate through
+ * characters in the normalized output. This means that there is not necessarily
+ * a one-to-one correspondence between characters returned by next() and
+ * previous() and the indices passed to and returned from setIndex() and
+ * getIndex(). It is for this reason that Normalizer does not implement the
+ * CharacterIterator interface.
+ *
+ * @stable ICU 2.8
+ */
+// Original filename in ICU4J: Normalizer.java
+public final class NormalizerBase implements Cloneable {
+
+	// The input text and our position in it
+	private UCharacterIterator text;
+	private Normalizer2 norm2;
+	private Mode mode;
+	private int options;
+
+	// The normalization buffer is the result of normalization
+	// of the source in [currentIndex..nextIndex] .
+	private int currentIndex;
+	private int nextIndex;
+
+	// A buffer for holding intermediate results
+	private StringBuilder buffer;
+	private int bufferPos;
+
+	// Helper classes to defer loading of normalization data.
+	private static final class ModeImpl {
+		private ModeImpl(Normalizer2 n2) {
+			normalizer2 = n2;
+		}
+
+		private final Normalizer2 normalizer2;
+	}
+
+	private static final class NFDModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFDInstance());
+	}
+
+	private static final class NFKDModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKDInstance());
+	}
+
+	private static final class NFCModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFCInstance());
+	}
+
+	private static final class NFKCModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKCInstance());
+	}
+
+	private static final class Unicode32 {
+		private static final UnicodeSet INSTANCE = new UnicodeSet("[:age=3.2:]").freeze();
+	}
+
+	private static final class NFD32ModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(
+				new FilteredNormalizer2(Normalizer2.getNFDInstance(), Unicode32.INSTANCE));
+	}
+
+	private static final class NFKD32ModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(
+				new FilteredNormalizer2(Normalizer2.getNFKDInstance(), Unicode32.INSTANCE));
+	}
+
+	private static final class NFC32ModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(
+				new FilteredNormalizer2(Normalizer2.getNFCInstance(), Unicode32.INSTANCE));
+	}
+
+	private static final class NFKC32ModeImpl {
+		private static final ModeImpl INSTANCE = new ModeImpl(
+				new FilteredNormalizer2(Normalizer2.getNFKCInstance(), Unicode32.INSTANCE));
+	}
+
+	/**
+	 * Options bit set value to select Unicode 3.2 normalization (except
+	 * NormalizationCorrections). At most one Unicode version can be selected at a
+	 * time.
+	 * 
+	 * @stable ICU 2.6
+	 */
+	public static final int UNICODE_3_2 = 0x20;
+
+	public static final int UNICODE_3_2_0_ORIGINAL = UNICODE_3_2;
+
+	/*
+	 * Default option for the latest Unicode normalization. This option is provided
+	 * mainly for testing. The value zero means that normalization is done with the
+	 * fixes for - Corrigendum 4 (Five CJK Canonical Mapping Errors) - Corrigendum 5
+	 * (Normalization Idempotency)
+	 */
+	public static final int UNICODE_LATEST = 0x00;
+
+	/**
+	 * Constant indicating that the end of the iteration has been reached. This is
+	 * guaranteed to have the same value as {@link UCharacterIterator#DONE}.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public static final int DONE = UCharacterIterator.DONE;
+
+	/**
+	 * Constants for normalization modes.
+	 * <p>
+	 * The Mode class is not intended for public subclassing. Only the Mode
+	 * constants provided by the Normalizer class should be used, and any fields or
+	 * methods should not be called or overridden by users.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public abstract static class Mode {
+
+		/**
+		 * Sole constructor
+		 * 
+		 * @internal
+		 * @deprecated This API is ICU internal only.
+		 */
+		@Deprecated
+		protected Mode() {
+		}
+
+		/**
+		 * @internal
+		 * @deprecated This API is ICU internal only.
+		 */
+		@Deprecated
+		protected abstract Normalizer2 getNormalizer2(int options);
+	}
+
+	private static Mode toMode(Normalizer.Form form) {
+		switch (form) {
+		case NFC:
+			return NFC;
+		case NFD:
+			return NFD;
+		case NFKC:
+			return NFKC;
+		case NFKD:
+			return NFKD;
+		}
+
+		throw new IllegalArgumentException("Unexpected normalization form: " + form);
+	}
+
+	private static final class NONEMode extends Mode {
+		protected Normalizer2 getNormalizer2(int options) {
+			return Norm2AllModes.NOOP_NORMALIZER2;
+		}
+	}
+
+	private static final class NFDMode extends Mode {
+		protected Normalizer2 getNormalizer2(int options) {
+			return (options & UNICODE_3_2) != 0 ? NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2;
+		}
+	}
+
+	private static final class NFKDMode extends Mode {
+		protected Normalizer2 getNormalizer2(int options) {
+			return (options & UNICODE_3_2) != 0 ? NFKD32ModeImpl.INSTANCE.normalizer2
+					: NFKDModeImpl.INSTANCE.normalizer2;
+		}
+	}
+
+	private static final class NFCMode extends Mode {
+		protected Normalizer2 getNormalizer2(int options) {
+			return (options & UNICODE_3_2) != 0 ? NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2;
+		}
+	}
+
+	private static final class NFKCMode extends Mode {
+		protected Normalizer2 getNormalizer2(int options) {
+			return (options & UNICODE_3_2) != 0 ? NFKC32ModeImpl.INSTANCE.normalizer2
+					: NFKCModeImpl.INSTANCE.normalizer2;
+		}
+	}
+
+	/**
+	 * No decomposition/composition.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public static final Mode NONE = new NONEMode();
+
+	/**
+	 * Canonical decomposition.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public static final Mode NFD = new NFDMode();
+
+	/**
+	 * Compatibility decomposition.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public static final Mode NFKD = new NFKDMode();
+
+	/**
+	 * Canonical decomposition followed by canonical composition.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public static final Mode NFC = new NFCMode();
+
+	public static final Mode NFKC = new NFKCMode();
+
+	// -------------------------------------------------------------------------
+	// Iterator constructors
+	// -------------------------------------------------------------------------
+
+	/**
+	 * Creates a new {@code NormalizerBase} object for iterating over the normalized
+	 * form of a given string.
+	 * <p>
+	 * The {@code options} parameter specifies which optional {@code NormalizerBase}
+	 * features are to be enabled for this object.
+	 * <p>
+	 * 
+	 * @param str  The string to be normalized. The normalization will start at the
+	 *             beginning of the string.
+	 *
+	 * @param mode The normalization mode.
+	 *
+	 * @param opt  Any optional features to be enabled. Currently the only available
+	 *             option is {@link #UNICODE_3_2}. If you want the default behavior
+	 *             corresponding to one of the standard Unicode Normalization Forms,
+	 *             use 0 for this argument.
+	 * @stable ICU 2.6
+	 */
+	public NormalizerBase(String str, Mode mode, int opt) {
+		this.text = UCharacterIterator.getInstance(str);
+		this.mode = mode;
+		this.options = opt;
+		norm2 = mode.getNormalizer2(opt);
+		buffer = new StringBuilder();
+	}
+
+	public NormalizerBase(String str, Mode mode) {
+		this(str, mode, 0);
+	}
+
+	/**
+	 * Creates a new {@code NormalizerBase} object for iterating over the normalized
+	 * form of the given text.
+	 * <p>
+	 * 
+	 * @param iter The input text to be normalized. The normalization will start at
+	 *             the beginning of the string.
+	 *
+	 * @param mode The normalization mode.
+	 *
+	 * @param opt  Any optional features to be enabled. Currently the only available
+	 *             option is {@link #UNICODE_3_2}. If you want the default behavior
+	 *             corresponding to one of the standard Unicode Normalization Forms,
+	 *             use 0 for this argument.
+	 * @stable ICU 2.6
+	 */
+	public NormalizerBase(CharacterIterator iter, Mode mode, int opt) {
+		this.text = UCharacterIterator.getInstance((CharacterIterator) iter.clone());
+		this.mode = mode;
+		this.options = opt;
+		norm2 = mode.getNormalizer2(opt);
+		buffer = new StringBuilder();
+	}
+
+	public NormalizerBase(CharacterIterator iter, Mode mode) {
+		this(iter, mode, 0);
+	}
+
+	/**
+	 * Clones this {@code NormalizerBase} object. All properties of this object are
+	 * duplicated in the new object, including the cloning of any
+	 * {@link CharacterIterator} that was passed in to the constructor or to
+	 * {@link #setText(CharacterIterator) setText}. However, the text storage
+	 * underlying the {@code CharacterIterator} is not duplicated unless the
+	 * iterator's {@code clone} method does so.
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public Object clone() {
+		try {
+			NormalizerBase copy = (NormalizerBase) super.clone();
+			copy.text = (UCharacterIterator) text.clone();
+			copy.mode = mode;
+			copy.options = options;
+			copy.norm2 = norm2;
+			copy.buffer = new StringBuilder(buffer);
+			copy.bufferPos = bufferPos;
+			copy.currentIndex = currentIndex;
+			copy.nextIndex = nextIndex;
+			return copy;
+		} catch (CloneNotSupportedException e) {
+			throw new InternalError(e.toString(), e);
+		}
+	}
+
+	/**
+	 * Normalizes a {@code String} using the given normalization operation.
+	 * <p>
+	 * The {@code options} parameter specifies which optional {@code NormalizerBase}
+	 * features are to be enabled for this operation. Currently the only available
+	 * option is {@link #UNICODE_3_2}. If you want the default behavior
+	 * corresponding to one of the standard Unicode Normalization Forms, use 0 for
+	 * this argument.
+	 * <p>
+	 * 
+	 * @param str     the input string to be normalized.
+	 * @param mode    the normalization mode
+	 * @param options the optional features to be enabled.
+	 * @return String the normalized string
+	 * @stable ICU 2.6
+	 */
+	public static String normalize(String str, Mode mode, int options) {
+		return mode.getNormalizer2(options).normalize(str);
+	}
+
+	public static String normalize(String str, Normalizer.Form form) {
+		return NormalizerBase.normalize(str, toMode(form), UNICODE_LATEST);
+	}
+
+	public static String normalize(String str, Normalizer.Form form, int options) {
+		return NormalizerBase.normalize(str, toMode(form), options);
+	}
+
+	/**
+	 * Test if a string is in a given normalization form. This is semantically
+	 * equivalent to source.equals(normalize(source, mode)).
+	 *
+	 * Unlike quickCheck(), this function returns a definitive result, never a
+	 * "maybe". For NFD, NFKD, and FCD, both functions work exactly the same. For
+	 * NFC and NFKC where quickCheck may return "maybe", this function will perform
+	 * further tests to arrive at a true/false result.
+	 * 
+	 * @param str     the input string to be checked to see if it is normalized
+	 * @param mode    the normalization mode
+	 * @param options Options for use with exclusion set and tailored Normalization
+	 *                The only option that is currently recognized is UNICODE_3_2
+	 * @see #isNormalized
+	 * @stable ICU 2.6
+	 */
+	public static boolean isNormalized(String str, Mode mode, int options) {
+		return mode.getNormalizer2(options).isNormalized(str);
+	}
+
+	public static boolean isNormalized(String str, Normalizer.Form form) {
+		return NormalizerBase.isNormalized(str, toMode(form), UNICODE_LATEST);
+	}
+
+	public static boolean isNormalized(String str, Normalizer.Form form, int options) {
+		return NormalizerBase.isNormalized(str, toMode(form), options);
+	}
+
+	// -------------------------------------------------------------------------
+	// Iteration API
+	// -------------------------------------------------------------------------
+
+	/**
+	 * Return the current character in the normalized text.
+	 * 
+	 * @return The codepoint as an int
+	 * @stable ICU 2.8
+	 */
+	public int current() {
+		if (bufferPos < buffer.length() || nextNormalize()) {
+			return buffer.codePointAt(bufferPos);
+		} else {
+			return DONE;
+		}
+	}
+
+	/**
+	 * Return the next character in the normalized text and advance the iteration
+	 * position by one. If the end of the text has already been reached,
+	 * {@link #DONE} is returned.
+	 * 
+	 * @return The codepoint as an int
+	 * @stable ICU 2.8
+	 */
+	public int next() {
+		if (bufferPos < buffer.length() || nextNormalize()) {
+			int c = buffer.codePointAt(bufferPos);
+			bufferPos += Character.charCount(c);
+			return c;
+		} else {
+			return DONE;
+		}
+	}
+
+	/**
+	 * Return the previous character in the normalized text and decrement the
+	 * iteration position by one. If the beginning of the text has already been
+	 * reached, {@link #DONE} is returned.
+	 * 
+	 * @return The codepoint as an int
+	 * @stable ICU 2.8
+	 */
+	public int previous() {
+		if (bufferPos > 0 || previousNormalize()) {
+			int c = buffer.codePointBefore(bufferPos);
+			bufferPos -= Character.charCount(c);
+			return c;
+		} else {
+			return DONE;
+		}
+	}
+
+	/**
+	 * Reset the index to the beginning of the text. This is equivalent to
+	 * setIndexOnly(startIndex)).
+	 * 
+	 * @stable ICU 2.8
+	 */
+	public void reset() {
+		text.setIndex(0);
+		currentIndex = nextIndex = 0;
+		clearBuffer();
+	}
+
+	/**
+	 * Set the iteration position in the input text that is being normalized,
+	 * without any immediate normalization. After setIndexOnly(), getIndex() will
+	 * return the same index that is specified here.
+	 *
+	 * @param index the desired index in the input text.
+	 * @stable ICU 2.8
+	 */
+	public void setIndexOnly(int index) {
+		text.setIndex(index); // validates index
+		currentIndex = nextIndex = index;
+		clearBuffer();
+	}
+
+	/**
+	 * Set the iteration position in the input text that is being normalized and
+	 * return the first normalized character at that position.
+	 * <p>
+	 * <b>Note:</b> This method sets the position in the <em>input</em> text, while
+	 * {@link #next} and {@link #previous} iterate through characters in the
+	 * normalized <em>output</em>. This means that there is not necessarily a
+	 * one-to-one correspondence between characters returned by {@code next} and
+	 * {@code previous} and the indices passed to and returned from {@code setIndex}
+	 * and {@link #getIndex}.
+	 * <p>
+	 * 
+	 * @param index the desired index in the input text.
+	 *
+	 * @return the first normalized character that is the result of iterating
+	 *         forward starting at the given index.
+	 *
+	 * @throws IllegalArgumentException if the given index is less than
+	 *                                  {@link #getBeginIndex} or greater than
+	 *                                  {@link #getEndIndex}. deprecated ICU 3.2
+	 * @obsolete ICU 3.2
+	 */
+	public int setIndex(int index) {
+		setIndexOnly(index);
+		return current();
+	}
+
+	/**
+	 * Retrieve the index of the start of the input text. This is the begin index of
+	 * the {@code CharacterIterator} or the start (i.e. 0) of the {@code String}
+	 * over which this {@code NormalizerBase} is iterating
+	 * 
+	 * @deprecated ICU 2.2. Use startIndex() instead.
+	 * @return The codepoint as an int
+	 * @see #startIndex
+	 */
+	@Deprecated
+	public int getBeginIndex() {
+		return 0;
+	}
+
+	/**
+	 * Retrieve the index of the end of the input text. This is the end index of the
+	 * {@code CharacterIterator} or the length of the {@code String} over which this
+	 * {@code NormalizerBase} is iterating
+	 * 
+	 * @deprecated ICU 2.2. Use endIndex() instead.
+	 * @return The codepoint as an int
+	 * @see #endIndex
+	 */
+	@Deprecated
+	public int getEndIndex() {
+		return endIndex();
+	}
+
+	/**
+	 * Retrieve the current iteration position in the input text that is being
+	 * normalized. This method is useful in applications such as searching, where
+	 * you need to be able to determine the position in the input text that
+	 * corresponds to a given normalized output character.
+	 * <p>
+	 * <b>Note:</b> This method sets the position in the <em>input</em>, while
+	 * {@link #next} and {@link #previous} iterate through characters in the
+	 * <em>output</em>. This means that there is not necessarily a one-to-one
+	 * correspondence between characters returned by {@code next} and
+	 * {@code previous} and the indices passed to and returned from {@code setIndex}
+	 * and {@link #getIndex}.
+	 * 
+	 * @return The current iteration position
+	 * @stable ICU 2.8
+	 */
+	public int getIndex() {
+		if (bufferPos < buffer.length()) {
+			return currentIndex;
+		} else {
+			return nextIndex;
+		}
+	}
+
+	/**
+	 * Retrieve the index of the end of the input text. This is the end index of the
+	 * {@code CharacterIterator} or the length of the {@code String} over which this
+	 * {@code NormalizerBase} is iterating
+	 * 
+	 * @return The current iteration position
+	 * @stable ICU 2.8
+	 */
+	public int endIndex() {
+		return text.getLength();
+	}
+
+	// -------------------------------------------------------------------------
+	// Iterator attributes
+	// -------------------------------------------------------------------------
+	/**
+	 * Set the normalization mode for this object.
+	 * <p>
+	 * <b>Note:</b>If the normalization mode is changed while iterating over a
+	 * string, calls to {@link #next} and {@link #previous} may return previously
+	 * buffers characters in the old normalization mode until the iteration is able
+	 * to re-sync at the next base character. It is safest to call {@link #setText
+	 * setText()}, {@link #first}, {@link #last}, etc. after calling
+	 * {@code setMode}.
+	 * <p>
+	 * 
+	 * @param newMode the new mode for this {@code NormalizerBase}. The supported
+	 *                modes are:
+	 *                <ul>
+	 *                <li>{@link #NFC} - Unicode canonical decompositiion followed
+	 *                by canonical composition.
+	 *                <li>{@link #NFKC} - Unicode compatibility decompositiion
+	 *                follwed by canonical composition.
+	 *                <li>{@link #NFD} - Unicode canonical decomposition
+	 *                <li>{@link #NFKD} - Unicode compatibility decomposition.
+	 *                <li>{@link #NONE} - Do nothing but return characters from the
+	 *                underlying input text.
+	 *                </ul>
+	 *
+	 * @see #getMode
+	 * @stable ICU 2.8
+	 */
+	public void setMode(Mode newMode) {
+		mode = newMode;
+		norm2 = mode.getNormalizer2(options);
+	}
+
+	/**
+	 * Return the basic operation performed by this {@code NormalizerBase}
+	 *
+	 * @see #setMode
+	 * @stable ICU 2.8
+	 */
+	public Mode getMode() {
+		return mode;
+	}
+
+	/**
+	 * Set the input text over which this {@code NormalizerBase} will iterate. The
+	 * iteration position is set to the beginning of the input text.
+	 * 
+	 * @param newText The new string to be normalized.
+	 * @stable ICU 2.8
+	 */
+	public void setText(String newText) {
+		UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
+		if (newIter == null) {
+			throw new IllegalStateException("Could not create a new UCharacterIterator");
+		}
+		text = newIter;
+		reset();
+	}
+
+	/**
+	 * Set the input text over which this {@code NormalizerBase} will iterate. The
+	 * iteration position is set to the beginning of the input text.
+	 * 
+	 * @param newText The new string to be normalized.
+	 * @stable ICU 2.8
+	 */
+	public void setText(CharacterIterator newText) {
+		UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
+		if (newIter == null) {
+			throw new IllegalStateException("Could not create a new UCharacterIterator");
+		}
+		text = newIter;
+		currentIndex = nextIndex = 0;
+		clearBuffer();
+	}
+
+	private void clearBuffer() {
+		buffer.setLength(0);
+		bufferPos = 0;
+	}
+
+	private boolean nextNormalize() {
+		clearBuffer();
+		currentIndex = nextIndex;
+		text.setIndex(nextIndex);
+		// Skip at least one character so we make progress.
+		int c = text.nextCodePoint();
+		if (c < 0) {
+			return false;
+		}
+		StringBuilder segment = new StringBuilder().appendCodePoint(c);
+		while ((c = text.nextCodePoint()) >= 0) {
+			if (norm2.hasBoundaryBefore(c)) {
+				text.moveCodePointIndex(-1);
+				break;
+			}
+			segment.appendCodePoint(c);
+		}
+		nextIndex = text.getIndex();
+		norm2.normalize(segment, buffer);
+		return buffer.length() != 0;
+	}
+
+	private boolean previousNormalize() {
+		clearBuffer();
+		nextIndex = currentIndex;
+		text.setIndex(currentIndex);
+		StringBuilder segment = new StringBuilder();
+		int c;
+		while ((c = text.previousCodePoint()) >= 0) {
+			if (c <= 0xffff) {
+				segment.insert(0, (char) c);
+			} else {
+				segment.insert(0, Character.toChars(c));
+			}
+			if (norm2.hasBoundaryBefore(c)) {
+				break;
+			}
+		}
+		currentIndex = text.getIndex();
+		norm2.normalize(segment, buffer);
+		bufferPos = buffer.length();
+		return buffer.length() != 0;
+	}
+
+}
--- a/sources/main/java/jdk_internal/icu/text/Replaceable.java
+++ b/sources/main/java/jdk_internal/icu/text/Replaceable.java
@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package jdk_internal.icu.text;
+
+/**
+ * <code>Replaceable</code> is an interface representing a string of characters
+ * that supports the replacement of a range of itself with a new string of
+ * characters. It is used by APIs that change a piece of text while retaining
+ * metadata. Metadata is data other than the Unicode characters returned by
+ * char32At(). One example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>
+ * An implicit aspect of the <code>Replaceable</code> API is that during a
+ * replace operation, new characters take on the metadata of the old characters.
+ * For example, if the string "the <b>bold</b> font" has range (4, 8) replaced
+ * with "strong", then it becomes "the <b>strong</b> font".
+ *
+ * <p>
+ * <code>Replaceable</code> specifies ranges using a start offset and a limit
+ * offset. The range of characters thus specified includes the characters at
+ * offset start..limit-1. That is, the start offset is inclusive, and the limit
+ * offset is exclusive.
+ *
+ * <p>
+ * <code>Replaceable</code> also includes API to access characters in the
+ * string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>
+ * For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ * <li>Set the metadata of the new text to the metadata of the first character
+ * replaced</li>
+ * <li>If no characters are replaced, use the metadata of the previous
+ * character</li>
+ * <li>If there is no previous character (i.e. start == 0), use the following
+ * character</li>
+ * <li>If there is no following character (i.e. the replaceable was empty), use
+ * default metadata</li>
+ * <li>If the code point U+FFFF is seen, it should be interpreted as a special
+ * marker having no metadata</li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ *
+ * <p>
+ * Copyright &copy; IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public interface Replaceable {
+	/**
+	 * Returns the number of 16-bit code units in the text.
+	 * 
+	 * @return number of 16-bit code units in text
+	 * @stable ICU 2.0
+	 */
+	int length();
+
+	/**
+	 * Returns the 16-bit code unit at the given offset into the text.
+	 * 
+	 * @param offset an integer between 0 and <code>length()</code>-1 inclusive
+	 * @return 16-bit code unit of text at given offset
+	 * @stable ICU 2.0
+	 */
+	char charAt(int offset);
+
+	/**
+	 * Copies characters from this object into the destination character array. The
+	 * first character to be copied is at index <code>srcStart</code>; the last
+	 * character to be copied is at index <code>srcLimit-1</code> (thus the total
+	 * number of characters to be copied is <code>srcLimit-srcStart</code>). The
+	 * characters are copied into the subarray of <code>dst</code> starting at index
+	 * <code>dstStart</code> and ending at index
+	 * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+	 *
+	 * @param srcStart the beginning index to copy, inclusive;
+	 *                 {@code 0 <= start <= limit}.
+	 * @param srcLimit the ending index to copy, exclusive;
+	 *                 {@code start <= limit <= length()}.
+	 * @param dst      the destination array.
+	 * @param dstStart the start offset in the destination array.
+	 * @stable ICU 2.0
+	 */
+	void getChars(int srcStart, int srcLimit, char dst[], int dstStart);
+}
--- a/sources/main/java/jdk_internal/icu/text/ReplaceableString.java
+++ b/sources/main/java/jdk_internal/icu/text/ReplaceableString.java
@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2009, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+package jdk_internal.icu.text;
+
+/**
+ * <code>ReplaceableString</code> is an adapter class that implements the
+ * <code>Replaceable</code> API around an ordinary <code>StringBuffer</code>.
+ *
+ * <p>
+ * <em>Note:</em> This class does not support attributes and is not intended for
+ * general use. Most clients will need to implement {@link Replaceable} in their
+ * text representation class.
+ *
+ * <p>
+ * Copyright &copy; IBM Corporation 1999. All rights reserved.
+ *
+ * @see Replaceable
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+public class ReplaceableString implements Replaceable {
+
+	private StringBuffer buf;
+
+	/**
+	 * Construct a new object with the given initial contents.
+	 * 
+	 * @param str initial contents
+	 * @stable ICU 2.0
+	 */
+	public ReplaceableString(String str) {
+		buf = new StringBuffer(str);
+	}
+
+	/**
+	 * Construct a new object using <code>buf</code> for internal storage. The
+	 * contents of <code>buf</code> at the time of construction are used as the
+	 * initial contents. <em>Note! Modifications to <code>buf</code> will modify
+	 * this object, and vice versa.</em>
+	 * 
+	 * @param buf object to be used as internal storage
+	 * @stable ICU 2.0
+	 */
+	public ReplaceableString(StringBuffer buf) {
+		this.buf = buf;
+	}
+
+	/**
+	 * Return the number of characters contained in this object.
+	 * <code>Replaceable</code> API.
+	 * 
+	 * @stable ICU 2.0
+	 */
+	public int length() {
+		return buf.length();
+	}
+
+	/**
+	 * Return the character at the given position in this object.
+	 * <code>Replaceable</code> API.
+	 * 
+	 * @param offset offset into the contents, from 0 to <code>length()</code> - 1
+	 * @stable ICU 2.0
+	 */
+	public char charAt(int offset) {
+		return buf.charAt(offset);
+	}
+
+	/**
+	 * Copies characters from this object into the destination character array. The
+	 * first character to be copied is at index <code>srcStart</code>; the last
+	 * character to be copied is at index <code>srcLimit-1</code> (thus the total
+	 * number of characters to be copied is <code>srcLimit-srcStart</code>). The
+	 * characters are copied into the subarray of <code>dst</code> starting at index
+	 * <code>dstStart</code> and ending at index
+	 * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+	 *
+	 * @param srcStart the beginning index to copy, inclusive;
+	 *                 {@code 0 <= start <= limit}.
+	 * @param srcLimit the ending index to copy, exclusive;
+	 *                 {@code start <= limit <= length()}.
+	 * @param dst      the destination array.
+	 * @param dstStart the start offset in the destination array.
+	 * @stable ICU 2.0
+	 */
+	public void getChars(int srcStart, int srcLimit, char dst[], int dstStart) {
+		if (srcStart != srcLimit) {
+			buf.getChars(srcStart, srcLimit, dst, dstStart);
+		}
+	}
+}
--- a/sources/main/java/jdk_internal/icu/text/StringPrep.java
+++ b/sources/main/java/jdk_internal/icu/text/StringPrep.java
@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+/*
+ *******************************************************************************
+ * Copyright (C) 2003-2004, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+//
+// CHANGELOG
+//      2005-05-19 Edward Wang
+//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/StringPrep.java
+//          - move from package com.ibm.icu.text to package sun.net.idn
+//          - use ParseException instead of StringPrepParseException
+//          - change 'Normalizer.getUnicodeVersion()' to 'NormalizerImpl.getUnicodeVersion()'
+//          - remove all @deprecated tag to make compiler happy
+//      2007-08-14 Martin Buchholz
+//          - remove redundant casts
+//
+package jdk_internal.icu.text;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import jdk_internal.bidi.Normalizer;
+import jdk_internal.bidi.ParseException;
+import jdk_internal.bidi.SunNormalizer;
+import jdk_internal.icu.impl.CharTrie;
+import jdk_internal.icu.impl.StringPrepDataReader;
+import jdk_internal.icu.impl.Trie;
+import jdk_internal.icu.lang.UCharacter;
+import jdk_internal.icu.lang.UCharacterDirection;
+import jdk_internal.icu.util.VersionInfo;
+
+/**
+ * StringPrep API implements the StingPrep framework as described by
+ * <a href="http://www.ietf.org/rfc/rfc3454.txt">RFC 3454</a>. StringPrep
+ * prepares Unicode strings for use in network protocols. Profiles of StingPrep
+ * are set of rules and data according to which the Unicode Strings are
+ * prepared. Each profiles contains tables which describe how a code point
+ * should be treated. The tables are broadly classied into
+ * <ul>
+ * <li>Unassigned Table: Contains code points that are unassigned in the Unicode
+ * Version supported by StringPrep. Currently RFC 3454 supports Unicode 3.2.
+ * </li>
+ * <li>Prohibited Table: Contains code points that are prohibted from the output
+ * of the StringPrep processing function.</li>
+ * <li>Mapping Table: Contains code ponts that are deleted from the output or
+ * case mapped.</li>
+ * </ul>
+ *
+ * The procedure for preparing Unicode strings:
+ * <ol>
+ * <li>Map: For each character in the input, check if it has a mapping and, if
+ * so, replace it with its mapping.</li>
+ * <li>Normalize: Possibly normalize the result of step 1 using Unicode
+ * normalization.</li>
+ * <li>Prohibit: Check for any characters that are not allowed in the output. If
+ * any are found, return an error.</li>
+ * <li>Check bidi: Possibly check for right-to-left characters, and if any are
+ * found, make sure that the whole string satisfies the requirements for
+ * bidirectional strings. If the string does not satisfy the requirements for
+ * bidirectional strings, return an error.</li>
+ * </ol>
+ * 
+ * @author Ram Viswanadha
+ * @draft ICU 2.8
+ */
+public final class StringPrep {
+	/**
+	 * Option to prohibit processing of unassigned code points in the input
+	 *
+	 * @see #prepare
+	 * @draft ICU 2.8
+	 */
+	public static final int DEFAULT = 0x0000;
+
+	/**
+	 * Option to allow processing of unassigned code points in the input
+	 *
+	 * @see #prepare
+	 * @draft ICU 2.8
+	 */
+	public static final int ALLOW_UNASSIGNED = 0x0001;
+
+	private static final int UNASSIGNED = 0x0000;
+	private static final int MAP = 0x0001;
+	private static final int PROHIBITED = 0x0002;
+	private static final int DELETE = 0x0003;
+	private static final int TYPE_LIMIT = 0x0004;
+
+	private static final int NORMALIZATION_ON = 0x0001;
+	private static final int CHECK_BIDI_ON = 0x0002;
+
+	private static final int TYPE_THRESHOLD = 0xFFF0;
+	private static final int MAX_INDEX_VALUE = 0x3FBF; /* 16139 */
+	private static final int MAX_INDEX_TOP_LENGTH = 0x0003;
+
+	/* indexes[] value names */
+	private static final int INDEX_TRIE_SIZE = 0; /* number of bytes in normalization trie */
+	private static final int INDEX_MAPPING_DATA_SIZE = 1; /* The array that contains the mapping */
+	private static final int NORM_CORRECTNS_LAST_UNI_VERSION = 2; /*
+																	 * The index of Unicode version of last entry in
+																	 * NormalizationCorrections.txt
+																	 */
+	private static final int ONE_UCHAR_MAPPING_INDEX_START = 3; /*
+																 * The starting index of 1 UChar mapping index in the
+																 * mapping data array
+																 */
+	private static final int TWO_UCHARS_MAPPING_INDEX_START = 4; /*
+																	 * The starting index of 2 UChars mapping index in
+																	 * the mapping data array
+																	 */
+	private static final int THREE_UCHARS_MAPPING_INDEX_START = 5;
+	private static final int FOUR_UCHARS_MAPPING_INDEX_START = 6;
+	private static final int OPTIONS = 7; /* Bit set of options to turn on in the profile */
+	private static final int INDEX_TOP = 16; /* changing this requires a new formatVersion */
+
+	/**
+	 * Default buffer size of datafile
+	 */
+	private static final int DATA_BUFFER_SIZE = 25000;
+
+	/* Wrappers for Trie implementations */
+	private static final class StringPrepTrieImpl implements Trie.DataManipulate {
+		private CharTrie sprepTrie = null;
+
+		/**
+		 * Called by com.ibm.icu.util.Trie to extract from a lead surrogate's data the
+		 * index array offset of the indexes for that lead surrogate.
+		 * 
+		 * @param property data value for a surrogate from the trie, including the
+		 *                 folding offset
+		 * @return data offset or 0 if there is no data for the lead surrogate
+		 */
+		public int getFoldingOffset(int value) {
+			return value;
+		}
+	}
+
+	// CharTrie implementation for reading the trie data
+	private StringPrepTrieImpl sprepTrieImpl;
+	// Indexes read from the data file
+	private int[] indexes;
+	// mapping data read from the data file
+	private char[] mappingData;
+	// format version of the data file
+	private byte[] formatVersion;
+	// the version of Unicode supported by the data file
+	private VersionInfo sprepUniVer;
+	// the Unicode version of last entry in the
+	// NormalizationCorrections.txt file if normalization
+	// is turned on
+	private VersionInfo normCorrVer;
+	// Option to turn on Normalization
+	private boolean doNFKC;
+	// Option to turn on checking for BiDi rules
+	private boolean checkBiDi;
+
+	private char getCodePointValue(int ch) {
+		return sprepTrieImpl.sprepTrie.getCodePointValue(ch);
+	}
+
+	private static VersionInfo getVersionInfo(int comp) {
+		int micro = comp & 0xFF;
+		int milli = (comp >> 8) & 0xFF;
+		int minor = (comp >> 16) & 0xFF;
+		int major = (comp >> 24) & 0xFF;
+		return VersionInfo.getInstance(major, minor, milli, micro);
+	}
+
+	private static VersionInfo getVersionInfo(byte[] version) {
+		if (version.length != 4) {
+			return null;
+		}
+		return VersionInfo.getInstance((int) version[0], (int) version[1], (int) version[2], (int) version[3]);
+	}
+
+	/**
+	 * Creates an StringPrep object after reading the input stream. The object does
+	 * not hold a reference to the input steam, so the stream can be closed after
+	 * the method returns.
+	 *
+	 * @param inputStream The stream for reading the StringPrep profile binarySun
+	 * @throws IOException
+	 * @draft ICU 2.8
+	 */
+	public StringPrep(InputStream inputStream) throws IOException {
+
+		BufferedInputStream b = new BufferedInputStream(inputStream, DATA_BUFFER_SIZE);
+
+		StringPrepDataReader reader = new StringPrepDataReader(b);
+
+		// read the indexes
+		indexes = reader.readIndexes(INDEX_TOP);
+
+		byte[] sprepBytes = new byte[indexes[INDEX_TRIE_SIZE]];
+
+		// indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
+		mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE] / 2];
+		// load the rest of the data and initialize the data members
+		reader.read(sprepBytes, mappingData);
+
+		sprepTrieImpl = new StringPrepTrieImpl();
+		sprepTrieImpl.sprepTrie = new CharTrie(new ByteArrayInputStream(sprepBytes), sprepTrieImpl);
+
+		// get the data format version
+		formatVersion = reader.getDataFormatVersion();
+
+		// get the options
+		doNFKC = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
+		checkBiDi = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
+		sprepUniVer = getVersionInfo(reader.getUnicodeVersion());
+		normCorrVer = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
+		VersionInfo normUniVer = UCharacter.getUnicodeVersion();
+		if (normUniVer.compareTo(sprepUniVer) < 0 && /*
+														 * the Unicode version of SPREP file must be less than the
+														 * Unicode Vesion of the normalization data
+														 */
+				normUniVer.compareTo(normCorrVer) < 0
+				&& /*
+					 * the Unicode version of the NormalizationCorrections.txt file should be less
+					 * than the Unicode Vesion of the normalization data
+					 */
+				((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on */
+		) {
+			throw new IOException("Normalization Correction version not supported");
+		}
+		b.close();
+	}
+
+	private static final class Values {
+		boolean isIndex;
+		int value;
+		int type;
+
+		public void reset() {
+			isIndex = false;
+			value = 0;
+			type = -1;
+		}
+	}
+
+	private static final void getValues(char trieWord, Values values) {
+		values.reset();
+		if (trieWord == 0) {
+			/*
+			 * Initial value stored in the mapping table just return TYPE_LIMIT .. so that
+			 * the source codepoint is copied to the destination
+			 */
+			values.type = TYPE_LIMIT;
+		} else if (trieWord >= TYPE_THRESHOLD) {
+			values.type = (trieWord - TYPE_THRESHOLD);
+		} else {
+			/* get the type */
+			values.type = MAP;
+			/* ascertain if the value is index or delta */
+			if ((trieWord & 0x02) > 0) {
+				values.isIndex = true;
+				values.value = trieWord >> 2; // mask off the lower 2 bits and shift
+
+			} else {
+				values.isIndex = false;
+				values.value = (trieWord << 16) >> 16;
+				values.value = (values.value >> 2);
+
+			}
+
+			if ((trieWord >> 2) == MAX_INDEX_VALUE) {
+				values.type = DELETE;
+				values.isIndex = false;
+				values.value = 0;
+			}
+		}
+	}
+
+	private StringBuffer map(UCharacterIterator iter, int options) throws ParseException {
+
+		Values val = new Values();
+		char result = 0;
+		int ch = UCharacterIterator.DONE;
+		StringBuffer dest = new StringBuffer();
+		boolean allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0);
+
+		while ((ch = iter.nextCodePoint()) != UCharacterIterator.DONE) {
+
+			result = getCodePointValue(ch);
+			getValues(result, val);
+
+			// check if the source codepoint is unassigned
+			if (val.type == UNASSIGNED && allowUnassigned == false) {
+				throw new ParseException("An unassigned code point was found in the input " + iter.getText(),
+						iter.getIndex());
+			} else if ((val.type == MAP)) {
+				int index, length;
+
+				if (val.isIndex) {
+					index = val.value;
+					if (index >= indexes[ONE_UCHAR_MAPPING_INDEX_START]
+							&& index < indexes[TWO_UCHARS_MAPPING_INDEX_START]) {
+						length = 1;
+					} else if (index >= indexes[TWO_UCHARS_MAPPING_INDEX_START]
+							&& index < indexes[THREE_UCHARS_MAPPING_INDEX_START]) {
+						length = 2;
+					} else if (index >= indexes[THREE_UCHARS_MAPPING_INDEX_START]
+							&& index < indexes[FOUR_UCHARS_MAPPING_INDEX_START]) {
+						length = 3;
+					} else {
+						length = mappingData[index++];
+					}
+					/* copy mapping to destination */
+					dest.append(mappingData, index, length);
+					continue;
+
+				} else {
+					ch -= val.value;
+				}
+			} else if (val.type == DELETE) {
+				// just consume the codepoint and contine
+				continue;
+			}
+			// copy the source into destination
+			UTF16.append(dest, ch);
+		}
+
+		return dest;
+	}
+
+	private StringBuffer normalize(StringBuffer src) {
+		/*
+		 * Option UNORM_BEFORE_PRI_29:
+		 *
+		 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
+		 * requires strict adherence to Unicode 3.2 normalization, including buggy
+		 * composition from before fixing Public Review Issue #29. Note that this
+		 * results in some valid but nonsensical text to be either corrupted or
+		 * rejected, depending on the text. See
+		 * http://www.unicode.org/review/resolved-pri.html#pri29 See unorm.cpp and
+		 * cnormtst.c
+		 */
+		return new StringBuffer(
+				SunNormalizer.normalize(src.toString(), Normalizer.Form.NFKC, SunNormalizer.UNICODE_3_2));
+	}
+
+	/*
+	 * boolean isLabelSeparator(int ch){ int result = getCodePointValue(ch); if(
+	 * (result & 0x07) == LABEL_SEPARATOR){ return true; } return false; }
+	 */
+	/*
+	 * 1) Map -- For each character in the input, check if it has a mapping and, if
+	 * so, replace it with its mapping.
+	 * 
+	 * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
+	 * normalization.
+	 * 
+	 * 3) Prohibit -- Check for any characters that are not allowed in the output.
+	 * If any are found, return an error.
+	 * 
+	 * 4) Check bidi -- Possibly check for right-to-left characters, and if any are
+	 * found, make sure that the whole string satisfies the requirements for
+	 * bidirectional strings. If the string does not satisfy the requirements for
+	 * bidirectional strings, return an error. [Unicode3.2] defines several
+	 * bidirectional categories; each character has one bidirectional category
+	 * assigned to it. For the purposes of the requirements below, an
+	 * "RandALCat character" is a character that has Unicode bidirectional
+	 * categories "R" or "AL"; an "LCat character" is a character that has Unicode
+	 * bidirectional category "L". Note
+	 * 
+	 * 
+	 * that there are many characters which fall in neither of the above
+	 * definitions; Latin digits (<U+0030> through <U+0039>) are examples of this
+	 * because they have bidirectional category "EN".
+	 * 
+	 * In any profile that specifies bidirectional character handling, all three of
+	 * the following requirements MUST be met:
+	 * 
+	 * 1) The characters in section 5.8 MUST be prohibited.
+	 * 
+	 * 2) If a string contains any RandALCat character, the string MUST NOT contain
+	 * any LCat character.
+	 * 
+	 * 3) If a string contains any RandALCat character, a RandALCat character MUST
+	 * be the first character of the string, and a RandALCat character MUST be the
+	 * last character of the string.
+	 */
+	/**
+	 * Prepare the input buffer for use in applications with the given profile. This
+	 * operation maps, normalizes(NFKC), checks for prohited and BiDi characters in
+	 * the order defined by RFC 3454 depending on the options specified in the
+	 * profile.
+	 *
+	 * @param src     A UCharacterIterator object containing the source string
+	 * @param options A bit set of options:
+	 *
+	 *                - StringPrep.NONE Prohibit processing of unassigned code
+	 *                points in the input
+	 *
+	 *                - StringPrep.ALLOW_UNASSIGNED Treat the unassigned code points
+	 *                are in the input as normal Unicode code points.
+	 *
+	 * @return StringBuffer A StringBuffer containing the output
+	 * @throws ParseException
+	 * @draft ICU 2.8
+	 */
+	public StringBuffer prepare(UCharacterIterator src, int options) throws ParseException {
+
+		// map
+		StringBuffer mapOut = map(src, options);
+		StringBuffer normOut = mapOut;// initialize
+
+		if (doNFKC) {
+			// normalize
+			normOut = normalize(mapOut);
+		}
+
+		int ch;
+		char result;
+		UCharacterIterator iter = UCharacterIterator.getInstance(normOut);
+		Values val = new Values();
+		int direction = UCharacterDirection.CHAR_DIRECTION_COUNT,
+				firstCharDir = UCharacterDirection.CHAR_DIRECTION_COUNT;
+		int rtlPos = -1, ltrPos = -1;
+		boolean rightToLeft = false, leftToRight = false;
+
+		while ((ch = iter.nextCodePoint()) != UCharacterIterator.DONE) {
+			result = getCodePointValue(ch);
+			getValues(result, val);
+
+			if (val.type == PROHIBITED) {
+				throw new ParseException("A prohibited code point was found in the input" + iter.getText(), val.value);
+			}
+
+			direction = UCharacter.getDirection(ch);
+			if (firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT) {
+				firstCharDir = direction;
+			}
+			if (direction == UCharacterDirection.LEFT_TO_RIGHT) {
+				leftToRight = true;
+				ltrPos = iter.getIndex() - 1;
+			}
+			if (direction == UCharacterDirection.RIGHT_TO_LEFT
+					|| direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) {
+				rightToLeft = true;
+				rtlPos = iter.getIndex() - 1;
+			}
+		}
+		if (checkBiDi == true) {
+			// satisfy 2
+			if (leftToRight == true && rightToLeft == true) {
+				throw new ParseException(
+						"The input does not conform to the rules for BiDi code points." + iter.getText(),
+						(rtlPos > ltrPos) ? rtlPos : ltrPos);
+			}
+
+			// satisfy 3
+			if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT
+					|| firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC)
+					&& (direction == UCharacterDirection.RIGHT_TO_LEFT
+							|| direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))) {
+				throw new ParseException(
+						"The input does not conform to the rules for BiDi code points." + iter.getText(),
+						(rtlPos > ltrPos) ? rtlPos : ltrPos);
+			}
+		}
+		return normOut;
+
+	}
+}
--- a/sources/main/java/jdk_internal/icu/text/UCharacterIterator.java
+++ b/sources/main/java/jdk_internal/icu/text/UCharacterIterator.java
@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+package jdk_internal.icu.text;
+
+import jdk_internal.bidi.CharacterIterator;
+import jdk_internal.icu.impl.CharacterIteratorWrapper;
+import jdk_internal.icu.impl.ReplaceableUCharacterIterator;
+import jdk_internal.icu.impl.UCharacterProperty;
+
+/**
+ * Abstract class that defines an API for iteration on text objects.This is an
+ * interface for forward and backward iteration and random access into a text
+ * object. Forward iteration is done with post-increment and backward iteration
+ * is done with pre-decrement semantics, while the
+ * <code>java.text.CharacterIterator</code> interface methods provided forward
+ * iteration with "pre-increment" and backward iteration with pre-decrement
+ * semantics. This API is more efficient for forward iteration over code points.
+ * The other major difference is that this API can do both code unit and code
+ * point iteration, <code>java.text.CharacterIterator</code> can only iterate
+ * over code units and is limited to BMP (0 - 0xFFFF)
+ * 
+ * @author Ram
+ * @stable ICU 2.4
+ */
+public abstract class UCharacterIterator implements Cloneable {
+
+	/**
+	 * Protected default constructor for the subclasses
+	 * 
+	 * @stable ICU 2.4
+	 */
+	protected UCharacterIterator() {
+	}
+
+	/**
+	 * Indicator that we have reached the ends of the UTF16 text. Moved from
+	 * UForwardCharacterIterator.java
+	 * 
+	 * @stable ICU 2.4
+	 */
+	public static final int DONE = -1;
+
+	// static final methods ----------------------------------------------------
+
+	/**
+	 * Returns a <code>UCharacterIterator</code> object given a source string.
+	 * 
+	 * @param source a string
+	 * @return UCharacterIterator object
+	 * @exception IllegalArgumentException if the argument is null
+	 * @stable ICU 2.4
+	 */
+	public static final UCharacterIterator getInstance(String source) {
+		return new ReplaceableUCharacterIterator(source);
+	}
+
+	/**
+	 * Returns a <code>UCharacterIterator</code> object given a source StringBuffer.
+	 * 
+	 * @param source an string buffer of UTF-16 code units
+	 * @return UCharacterIterator object
+	 * @exception IllegalArgumentException if the argument is null
+	 * @stable ICU 2.4
+	 */
+	public static final UCharacterIterator getInstance(StringBuffer source) {
+		return new ReplaceableUCharacterIterator(source);
+	}
+
+	/**
+	 * Returns a <code>UCharacterIterator</code> object given a CharacterIterator.
+	 * 
+	 * @param source a valid CharacterIterator object.
+	 * @return UCharacterIterator object
+	 * @exception IllegalArgumentException if the argument is null
+	 * @stable ICU 2.4
+	 */
+	public static final UCharacterIterator getInstance(CharacterIterator source) {
+		return new CharacterIteratorWrapper(source);
+	}
+
+	// public methods ----------------------------------------------------------
+
+	/**
+	 * Returns the length of the text
+	 * 
+	 * @return length of the text
+	 * @stable ICU 2.4
+	 */
+	public abstract int getLength();
+
+	/**
+	 * Gets the current index in text.
+	 * 
+	 * @return current index in text.
+	 * @stable ICU 2.4
+	 */
+	public abstract int getIndex();
+
+	/**
+	 * Returns the UTF16 code unit at index, and increments to the next code unit
+	 * (post-increment semantics). If index is out of range, DONE is returned, and
+	 * the iterator is reset to the limit of the text.
+	 * 
+	 * @return the next UTF16 code unit, or DONE if the index is at the limit of the
+	 *         text.
+	 * @stable ICU 2.4
+	 */
+	public abstract int next();
+
+	/**
+	 * Returns the code point at index, and increments to the next code point
+	 * (post-increment semantics). If index does not point to a valid surrogate
+	 * pair, the behavior is the same as <code>next()</code>. Otherwise the iterator
+	 * is incremented past the surrogate pair, and the code point represented by the
+	 * pair is returned.
+	 * 
+	 * @return the next codepoint in text, or DONE if the index is at the limit of
+	 *         the text.
+	 * @stable ICU 2.4
+	 */
+	public int nextCodePoint() {
+		int ch1 = next();
+		if (UTF16.isLeadSurrogate((char) ch1)) {
+			int ch2 = next();
+			if (UTF16.isTrailSurrogate((char) ch2)) {
+				return UCharacterProperty.getRawSupplementary((char) ch1, (char) ch2);
+			} else if (ch2 != DONE) {
+				// unmatched surrogate so back out
+				previous();
+			}
+		}
+		return ch1;
+	}
+
+	/**
+	 * Decrement to the position of the previous code unit in the text, and return
+	 * it (pre-decrement semantics). If the resulting index is less than 0, the
+	 * index is reset to 0 and DONE is returned.
+	 * 
+	 * @return the previous code unit in the text, or DONE if the new index is
+	 *         before the start of the text.
+	 * @stable ICU 2.4
+	 */
+	public abstract int previous();
+
+	/**
+	 * Retreat to the start of the previous code point in the text, and return it
+	 * (pre-decrement semantics). If the index is not preceeded by a valid surrogate
+	 * pair, the behavior is the same as <code>previous()</code>. Otherwise the
+	 * iterator is decremented to the start of the surrogate pair, and the code
+	 * point represented by the pair is returned.
+	 * 
+	 * @return the previous code point in the text, or DONE if the new index is
+	 *         before the start of the text.
+	 * @stable ICU 2.4
+	 */
+	public int previousCodePoint() {
+		int ch1 = previous();
+		if (UTF16.isTrailSurrogate((char) ch1)) {
+			int ch2 = previous();
+			if (UTF16.isLeadSurrogate((char) ch2)) {
+				return UCharacterProperty.getRawSupplementary((char) ch2, (char) ch1);
+			} else if (ch2 != DONE) {
+				// unmatched trail surrogate so back out
+				next();
+			}
+		}
+		return ch1;
+	}
+
+	/**
+	 * Sets the index to the specified index in the text.
+	 * 
+	 * @param index the index within the text.
+	 * @exception IndexOutOfBoundsException is thrown if an invalid index is
+	 *                                      supplied
+	 * @stable ICU 2.4
+	 */
+	public abstract void setIndex(int index);
+
+	/**
+	 * Sets the current index to the start.
+	 * 
+	 * @stable ICU 2.4
+	 */
+	public void setToStart() {
+		setIndex(0);
+	}
+
+	/**
+	 * Fills the buffer with the underlying text storage of the iterator If the
+	 * buffer capacity is not enough a exception is thrown. The capacity of the fill
+	 * in buffer should at least be equal to length of text in the iterator obtained
+	 * by calling <code>getLength()</code>. <b>Usage:</b>
+	 *
+	 * <pre>{@code
+	 *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
+	 *         char[] buf = new char[iter.getLength()];
+	 *         iter.getText(buf);
+	 *
+	 *         OR
+	 *         char[] buf= new char[1];
+	 *         int len = 0;
+	 *         for(;;){
+	 *             try{
+	 *                 len = iter.getText(buf);
+	 *                 break;
+	 *             }catch(IndexOutOfBoundsException e){
+	 *                 buf = new char[iter.getLength()];
+	 *             }
+	 *         }
+	 * }</pre>
+	 *
+	 * @param fillIn an array of chars to fill with the underlying UTF-16 code
+	 *               units.
+	 * @param offset the position within the array to start putting the data.
+	 * @return the number of code units added to fillIn, as a convenience
+	 * @exception IndexOutOfBoundsException exception if there is not enough room
+	 *                                      after offset in the array, or if offset
+	 *                                      < 0.
+	 * @stable ICU 2.4
+	 */
+	public abstract int getText(char[] fillIn, int offset);
+
+	/**
+	 * Convenience override for <code>getText(char[], int)</code> that provides an
+	 * offset of 0.
+	 * 
+	 * @param fillIn an array of chars to fill with the underlying UTF-16 code
+	 *               units.
+	 * @return the number of code units added to fillIn, as a convenience
+	 * @exception IndexOutOfBoundsException exception if there is not enough room in
+	 *                                      the array.
+	 * @stable ICU 2.4
+	 */
+	public final int getText(char[] fillIn) {
+		return getText(fillIn, 0);
+	}
+
+	/**
+	 * Convenience method for returning the underlying text storage as a string
+	 * 
+	 * @return the underlying text storage in the iterator as a string
+	 * @stable ICU 2.4
+	 */
+	public String getText() {
+		char[] text = new char[getLength()];
+		getText(text);
+		return new String(text);
+	}
+
+	/**
+	 * Moves the current position by the number of code points specified, either
+	 * forward or backward depending on the sign of delta (positive or negative
+	 * respectively). If the current index is at a trail surrogate then the first
+	 * adjustment is by code unit, and the remaining adjustments are by code points.
+	 * If the resulting index would be less than zero, the index is set to zero, and
+	 * if the resulting index would be greater than limit, the index is set to
+	 * limit.
+	 * 
+	 * @param delta the number of code units to move the current index.
+	 * @return the new index
+	 * @exception IndexOutOfBoundsException is thrown if an invalid delta is
+	 *                                      supplied
+	 * @stable ICU 2.4
+	 *
+	 */
+	public int moveCodePointIndex(int delta) {
+		if (delta > 0) {
+			while (delta > 0 && nextCodePoint() != DONE) {
+				delta--;
+			}
+		} else {
+			while (delta < 0 && previousCodePoint() != DONE) {
+				delta++;
+			}
+		}
+		if (delta != 0) {
+			throw new IndexOutOfBoundsException();
+		}
+
+		return getIndex();
+	}
+
+	/**
+	 * Creates a copy of this iterator, independent from other iterators. If it is
+	 * not possible to clone the iterator, returns null.
+	 * 
+	 * @return copy of this iterator
+	 * @stable ICU 2.4
+	 */
+	public Object clone() throws CloneNotSupportedException {
+		return super.clone();
+	}
+
+}
--- a/sources/main/java/jdk_internal/icu/text/UTF16.java
+++ b/sources/main/java/jdk_internal/icu/text/UTF16.java
@ -0,0 +1,609 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/**
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk_internal.icu.text;
+
+import jdk_internal.icu.impl.UCharacterProperty;
+
+/**
+ * <p>
+ * Standalone utility class providing UTF16 character conversions and indexing
+ * conversions.
+ * <p>
+ * Code that uses strings alone rarely need modification. By design, UTF-16 does
+ * not allow overlap, so searching for strings is a safe operation. Similarly,
+ * concatenation is always safe. Substringing is safe if the start and end are
+ * both on UTF-32 boundaries. In normal code, the values for start and end are
+ * on those boundaries, since they arose from operations like searching. If not,
+ * the nearest UTF-32 boundaries can be determined using <code>bounds()</code>.
+ * <strong>Examples:</strong>
+ * <p>
+ * The following examples illustrate use of some of these methods.
+ * 
+ * <pre>{@code
+ * // iteration forwards: Original
+ * for (int i = 0; i < s.length(); ++i) {
+ * 	char ch = s.charAt(i);
+ * 	doSomethingWith(ch);
+ * }
+ *
+ * // iteration forwards: Changes for UTF-32
+ * int ch;
+ * for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) {
+ * 	ch = UTF16.charAt(s, i);
+ * 	doSomethingWith(ch);
+ * }
+ *
+ * // iteration backwards: Original
+ * for (int i = s.length() - 1; i >= 0; --i) {
+ * 	char ch = s.charAt(i);
+ * 	doSomethingWith(ch);
+ * }
+ *
+ * // iteration backwards: Changes for UTF-32
+ * int ch;
+ * for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) {
+ * 	ch = UTF16.charAt(s, i);
+ * 	doSomethingWith(ch);
+ * }
+ * }</pre>
+ * 
+ * <strong>Notes:</strong>
+ * <ul>
+ * <li><strong>Naming:</strong> For clarity, High and Low surrogates are called
+ * <code>Lead</code> and <code>Trail</code> in the API, which gives a better
+ * sense of their ordering in a string. <code>offset16</code> and
+ * <code>offset32</code> are used to distinguish offsets to UTF-16 boundaries vs
+ * offsets to UTF-32 boundaries. <code>int char32</code> is used to contain
+ * UTF-32 characters, as opposed to <code>char16</code>, which is a UTF-16 code
+ * unit.</li>
+ * <li><strong>Roundtripping Offsets:</strong> You can always roundtrip from a
+ * UTF-32 offset to a UTF-16 offset and back. Because of the difference in
+ * structure, you can roundtrip from a UTF-16 offset to a UTF-32 offset and back
+ * if and only if <code>bounds(string, offset16) != TRAIL</code>.</li>
+ * <li><strong>Exceptions:</strong> The error checking will throw an exception
+ * if indices are out of bounds. Other than that, all methods will behave
+ * reasonably, even if unmatched surrogates or out-of-bounds UTF-32 values are
+ * present. <code>UCharacter.isLegal()</code> can be used to check for validity
+ * if desired.</li>
+ * <li><strong>Unmatched Surrogates:</strong> If the string contains unmatched
+ * surrogates, then these are counted as one UTF-32 value. This matches their
+ * iteration behavior, which is vital. It also matches common display practice
+ * as missing glyphs (see the Unicode Standard Section 5.4, 5.5).</li>
+ * <li><strong>Optimization:</strong> The method implementations may need
+ * optimization if the compiler doesn't fold static final methods. Since
+ * surrogate pairs will form an exceeding small percentage of all the text in
+ * the world, the singleton case should always be optimized for.</li>
+ * </ul>
+ * 
+ * @author Mark Davis, with help from Markus Scherer
+ * @stable ICU 2.1
+ */
+
+public final class UTF16 {
+	// public variables ---------------------------------------------------
+
+	/**
+	 * The lowest Unicode code point value.
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int CODEPOINT_MIN_VALUE = 0;
+	/**
+	 * The highest Unicode code point value (scalar value) according to the Unicode
+	 * Standard.
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int CODEPOINT_MAX_VALUE = 0x10ffff;
+	/**
+	 * The minimum value for Supplementary code points
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
+	/**
+	 * Lead surrogate minimum value
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
+	/**
+	 * Trail surrogate minimum value
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
+	/**
+	 * Lead surrogate maximum value
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
+	/**
+	 * Trail surrogate maximum value
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
+	/**
+	 * Surrogate minimum value
+	 * 
+	 * @stable ICU 2.1
+	 */
+	public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
+	/**
+	 * Lead surrogate bitmask
+	 */
+	private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
+	/**
+	 * Trail surrogate bitmask
+	 */
+	private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
+	/**
+	 * Surrogate bitmask
+	 */
+	private static final int SURROGATE_BITMASK = 0xFFFFF800;
+	/**
+	 * Lead surrogate bits
+	 */
+	private static final int LEAD_SURROGATE_BITS = 0xD800;
+	/**
+	 * Trail surrogate bits
+	 */
+	private static final int TRAIL_SURROGATE_BITS = 0xDC00;
+	/**
+	 * Surrogate bits
+	 */
+	private static final int SURROGATE_BITS = 0xD800;
+
+	// constructor --------------------------------------------------------
+
+	// /CLOVER:OFF
+	/**
+	 * Prevent instance from being created.
+	 */
+	private UTF16() {
+	}
+
+	// /CLOVER:ON
+	// public method ------------------------------------------------------
+
+	/**
+	 * Extract a single UTF-32 value from a string. Used when iterating forwards or
+	 * backwards (with <code>UTF16.getCharCount()</code>, as well as random access.
+	 * If a validity check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">
+	 * UCharacter.isLegal()</a></code> on the return value. If the char retrieved is
+	 * part of a surrogate pair, its supplementary character will be returned. If a
+	 * complete supplementary character is not found the incomplete character will
+	 * be returned
+	 * 
+	 * @param source   array of UTF-16 chars
+	 * @param offset16 UTF-16 offset to the start of the character.
+	 * @return UTF-32 value for the UTF-32 value that contains the char at offset16.
+	 *         The boundaries of that codepoint are the same as in
+	 *         <code>bounds32()</code>.
+	 * @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
+	 * @stable ICU 2.1
+	 */
+	public static int charAt(String source, int offset16) {
+		char single = source.charAt(offset16);
+		if (single < LEAD_SURROGATE_MIN_VALUE) {
+			return single;
+		}
+		return _charAt(source, offset16, single);
+	}
+
+	private static int _charAt(String source, int offset16, char single) {
+		if (single > TRAIL_SURROGATE_MAX_VALUE) {
+			return single;
+		}
+
+		// Convert the UTF-16 surrogate pair if necessary.
+		// For simplicity in usage, and because the frequency of pairs is
+		// low, look both directions.
+
+		if (single <= LEAD_SURROGATE_MAX_VALUE) {
+			++offset16;
+			if (source.length() != offset16) {
+				char trail = source.charAt(offset16);
+				if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
+					return UCharacterProperty.getRawSupplementary(single, trail);
+				}
+			}
+		} else {
+			--offset16;
+			if (offset16 >= 0) {
+				// single is a trail surrogate so
+				char lead = source.charAt(offset16);
+				if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
+					return UCharacterProperty.getRawSupplementary(lead, single);
+				}
+			}
+		}
+		return single; // return unmatched surrogate
+	}
+
+	/**
+	 * Extract a single UTF-32 value from a string. Used when iterating forwards or
+	 * backwards (with <code>UTF16.getCharCount()</code>, as well as random access.
+	 * If a validity check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
+	 * </a></code> on the return value. If the char retrieved is part of a surrogate
+	 * pair, its supplementary character will be returned. If a complete
+	 * supplementary character is not found the incomplete character will be
+	 * returned
+	 * 
+	 * @param source   array of UTF-16 chars
+	 * @param offset16 UTF-16 offset to the start of the character.
+	 * @return UTF-32 value for the UTF-32 value that contains the char at offset16.
+	 *         The boundaries of that codepoint are the same as in
+	 *         <code>bounds32()</code>.
+	 * @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
+	 * @stable ICU 2.1
+	 */
+	public static int charAt(CharSequence source, int offset16) {
+		char single = source.charAt(offset16);
+		if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+			return single;
+		}
+		return _charAt(source, offset16, single);
+	}
+
+	private static int _charAt(CharSequence source, int offset16, char single) {
+		if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+			return single;
+		}
+
+		// Convert the UTF-16 surrogate pair if necessary.
+		// For simplicity in usage, and because the frequency of pairs is
+		// low, look both directions.
+
+		if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+			++offset16;
+			if (source.length() != offset16) {
+				char trail = source.charAt(offset16);
+				if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+					return UCharacterProperty.getRawSupplementary(single, trail);
+				}
+			}
+		} else {
+			--offset16;
+			if (offset16 >= 0) {
+				// single is a trail surrogate so
+				char lead = source.charAt(offset16);
+				if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+					return UCharacterProperty.getRawSupplementary(lead, single);
+				}
+			}
+		}
+		return single; // return unmatched surrogate
+	}
+
+	/**
+	 * Extract a single UTF-32 value from a substring. Used when iterating forwards
+	 * or backwards (with <code>UTF16.getCharCount()</code>, as well as random
+	 * access. If a validity check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
+	 * </a></code> on the return value. If the char retrieved is part of a surrogate
+	 * pair, its supplementary character will be returned. If a complete
+	 * supplementary character is not found the incomplete character will be
+	 * returned
+	 *
+	 * @param source   Array of UTF-16 chars
+	 * @param start    Offset to substring in the source array for analyzing
+	 * @param limit    Offset to substring in the source array for analyzing
+	 * @param offset16 UTF-16 offset relative to start
+	 * @return UTF-32 value for the UTF-32 value that contains the char at offset16.
+	 *         The boundaries of that codepoint are the same as in
+	 *         <code>bounds32()</code>.
+	 * @exception IndexOutOfBoundsException Thrown if offset16 is not within the
+	 *                                      range of start and limit.
+	 * @stable ICU 2.1
+	 */
+	public static int charAt(char source[], int start, int limit, int offset16) {
+		offset16 += start;
+		if (offset16 < start || offset16 >= limit) {
+			throw new ArrayIndexOutOfBoundsException(offset16);
+		}
+
+		char single = source[offset16];
+		if (!isSurrogate(single)) {
+			return single;
+		}
+
+		// Convert the UTF-16 surrogate pair if necessary.
+		// For simplicity in usage, and because the frequency of pairs is
+		// low, look both directions.
+		if (single <= LEAD_SURROGATE_MAX_VALUE) {
+			offset16++;
+			if (offset16 >= limit) {
+				return single;
+			}
+			char trail = source[offset16];
+			if (isTrailSurrogate(trail)) {
+				return UCharacterProperty.getRawSupplementary(single, trail);
+			}
+		} else { // isTrailSurrogate(single), so
+			if (offset16 == start) {
+				return single;
+			}
+			offset16--;
+			char lead = source[offset16];
+			if (isLeadSurrogate(lead))
+				return UCharacterProperty.getRawSupplementary(lead, single);
+		}
+		return single; // return unmatched surrogate
+	}
+
+	/**
+	 * Determines how many chars this char32 requires. If a validity check is
+	 * required, use <code>
+	 * <a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+	 * char32 before calling.
+	 * 
+	 * @param char32 the input codepoint.
+	 * @return 2 if is in supplementary space, otherwise 1.
+	 * @stable ICU 2.1
+	 */
+	public static int getCharCount(int char32) {
+		if (char32 < SUPPLEMENTARY_MIN_VALUE) {
+			return 1;
+		}
+		return 2;
+	}
+
+	/**
+	 * Determines whether the code value is a surrogate.
+	 * 
+	 * @param char16 the input character.
+	 * @return true if the input character is a surrogate.
+	 * @stable ICU 2.1
+	 */
+	public static boolean isSurrogate(char char16) {
+		return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
+	}
+
+	/**
+	 * Determines whether the character is a trail surrogate.
+	 * 
+	 * @param char16 the input character.
+	 * @return true if the input character is a trail surrogate.
+	 * @stable ICU 2.1
+	 */
+	public static boolean isTrailSurrogate(char char16) {
+		return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
+	}
+
+	/**
+	 * Determines whether the character is a lead surrogate.
+	 * 
+	 * @param char16 the input character.
+	 * @return true if the input character is a lead surrogate
+	 * @stable ICU 2.1
+	 */
+	public static boolean isLeadSurrogate(char char16) {
+		return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
+	}
+
+	/**
+	 * Returns the lead surrogate. If a validity check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+	 * char32 before calling.
+	 * 
+	 * @param char32 the input character.
+	 * @return lead surrogate if the getCharCount(ch) is 2; <br>
+	 *         and 0 otherwise (note: 0 is not a valid lead surrogate).
+	 * @stable ICU 2.1
+	 */
+	public static char getLeadSurrogate(int char32) {
+		if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
+			return (char) (LEAD_SURROGATE_OFFSET_ + (char32 >> LEAD_SURROGATE_SHIFT_));
+		}
+
+		return 0;
+	}
+
+	/**
+	 * Returns the trail surrogate. If a validity check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+	 * char32 before calling.
+	 * 
+	 * @param char32 the input character.
+	 * @return the trail surrogate if the getCharCount(ch) is 2; <br>
+	 *         otherwise the character itself
+	 * @stable ICU 2.1
+	 */
+	public static char getTrailSurrogate(int char32) {
+		if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
+			return (char) (TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_));
+		}
+
+		return (char) char32;
+	}
+
+	/**
+	 * Convenience method corresponding to String.valueOf(char). Returns a one or
+	 * two char string containing the UTF-32 value in UTF16 format. If a validity
+	 * check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+	 * char32 before calling.
+	 * 
+	 * @param char32 the input character.
+	 * @return string value of char32 in UTF16 format
+	 * @exception IllegalArgumentException thrown if char32 is a invalid codepoint.
+	 * @stable ICU 2.1
+	 */
+	public static String valueOf(int char32) {
+		if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
+			throw new IllegalArgumentException("Illegal codepoint");
+		}
+		return toString(char32);
+	}
+
+	/**
+	 * Append a single UTF-32 value to the end of a StringBuffer. If a validity
+	 * check is required, use
+	 * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> on
+	 * char32 before calling.
+	 * 
+	 * @param target the buffer to append to
+	 * @param char32 value to append.
+	 * @return the updated StringBuffer
+	 * @exception IllegalArgumentException thrown when char32 does not lie within
+	 *                                     the range of the Unicode codepoints
+	 * @stable ICU 2.1
+	 */
+	public static StringBuffer append(StringBuffer target, int char32) {
+		// Check for irregular values
+		if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) {
+			throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32));
+		}
+
+		// Write the UTF-16 values
+		if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
+			target.append(getLeadSurrogate(char32));
+			target.append(getTrailSurrogate(char32));
+		} else {
+			target.append((char) char32);
+		}
+		return target;
+	}
+
+	/**
+	 * Shifts offset16 by the argument number of codepoints within a subarray.
+	 * 
+	 * @param source   char array
+	 * @param start    position of the subarray to be performed on
+	 * @param limit    position of the subarray to be performed on
+	 * @param offset16 UTF16 position to shift relative to start
+	 * @param shift32  number of codepoints to shift
+	 * @return new shifted offset16 relative to start
+	 * @exception IndexOutOfBoundsException if the new offset16 is out of bounds
+	 *                                      with respect to the subarray or the
+	 *                                      subarray bounds are out of range.
+	 * @stable ICU 2.1
+	 */
+	public static int moveCodePointOffset(char source[], int start, int limit, int offset16, int shift32) {
+		int size = source.length;
+		int count;
+		char ch;
+		int result = offset16 + start;
+		if (start < 0 || limit < start) {
+			throw new StringIndexOutOfBoundsException(start);
+		}
+		if (limit > size) {
+			throw new StringIndexOutOfBoundsException(limit);
+		}
+		if (offset16 < 0 || result > limit) {
+			throw new StringIndexOutOfBoundsException(offset16);
+		}
+		if (shift32 > 0) {
+			if (shift32 + result > size) {
+				throw new StringIndexOutOfBoundsException(result);
+			}
+			count = shift32;
+			while (result < limit && count > 0) {
+				ch = source[result];
+				if (isLeadSurrogate(ch) && (result + 1 < limit) && isTrailSurrogate(source[result + 1])) {
+					result++;
+				}
+				count--;
+				result++;
+			}
+		} else {
+			if (result + shift32 < start) {
+				throw new StringIndexOutOfBoundsException(result);
+			}
+			for (count = -shift32; count > 0; count--) {
+				result--;
+				if (result < start) {
+					break;
+				}
+				ch = source[result];
+				if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) {
+					result--;
+				}
+			}
+		}
+		if (count != 0) {
+			throw new StringIndexOutOfBoundsException(shift32);
+		}
+		result -= start;
+		return result;
+	}
+
+	// private data members -------------------------------------------------
+
+	/**
+	 * Shift value for lead surrogate to form a supplementary character.
+	 */
+	private static final int LEAD_SURROGATE_SHIFT_ = 10;
+
+	/**
+	 * Mask to retrieve the significant value from a trail surrogate.
+	 */
+	private static final int TRAIL_SURROGATE_MASK_ = 0x3FF;
+
+	/**
+	 * Value that all lead surrogate starts with
+	 */
+	private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE
+			- (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_);
+
+	// private methods ------------------------------------------------------
+
+	/**
+	 * <p>
+	 * Converts argument code point and returns a String object representing the
+	 * code point's value in UTF16 format.
+	 * <p>
+	 * This method does not check for the validity of the codepoint, the results are
+	 * not guaranteed if a invalid codepoint is passed as argument.
+	 * <p>
+	 * The result is a string whose length is 1 for non-supplementary code points, 2
+	 * otherwise.
+	 * 
+	 * @param ch code point
+	 * @return string representation of the code point
+	 */
+	private static String toString(int ch) {
+		if (ch < SUPPLEMENTARY_MIN_VALUE) {
+			return String.valueOf((char) ch);
+		}
+
+		StringBuilder result = new StringBuilder();
+		result.append(getLeadSurrogate(ch));
+		result.append(getTrailSurrogate(ch));
+		return result.toString();
+	}
+}
--- a/sources/main/java/jdk_internal/icu/text/UnicodeSet.java
+++ b/sources/main/java/jdk_internal/icu/text/UnicodeSet.java