From 4e8778e52b6a91f116f9417c384e02d983e4e58b Mon Sep 17 00:00:00 2001
From: mengjingzhimo The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * The preContext and postContext strings include some part of the
+ * context surrounding the error. If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+ /**
+ * The line on which the error occurred. If the parser uses this
+ * field, it sets it to the line number of the source text line on
+ * which the error appears, which will be a value >= 1. If the
+ * parse does not support line numbers, the value will be <= 0.
+ * @stable ICU 2.0
+ */
+ int32_t line;
+
+ /**
+ * The character offset to the error. If the line field is >= 1,
+ * then this is the offset from the start of the line. Otherwise,
+ * this is the offset from the start of the text. If the parser
+ * does not support this field, it will have a value < 0.
+ * @stable ICU 2.0
+ */
+ int32_t offset;
+
+ /**
+ * Textual context before the error. Null-terminated. The empty
+ * string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar preContext[U_PARSE_CONTEXT_LEN];
+
+ /**
+ * The error itself and/or textual context after the error.
+ * Null-terminated. The empty string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/stringoptions.h b/third_party/icu4c/ndk_headers/unicode/stringoptions.h
new file mode 100644
index 00000000000..09444284cad
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/stringoptions.h
@@ -0,0 +1,190 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * Titlecase the string as a whole rather than each word.
+ * (Titlecase only the character at index 0, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_WHOLE_STRING 0x20
+
+/**
+ * Titlecase sentences rather than words.
+ * (Titlecase only the first character of each sentence, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_SENTENCES 0x40
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the character at each
+ * (possibly adjusted) BreakIterator index and
+ * lowercase all other characters up to the next iterator index.
+ * With this option, the other characters will not be modified.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing BreakIterator indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
+ * and titlecase that one.
+ *
+ * Other characters are lowercased.
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+/**
+ * Adjust each titlecasing BreakIterator index to the next cased character.
+ * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * This used to be the default index adjustment in ICU.
+ * Since ICU 60, the default index adjustment is to the next character that is
+ * a letter, number, symbol, or private use code point.
+ * (Uncased modifier letters are skipped.)
+ * The difference in behavior is small for word titlecasing,
+ * but the new adjustment is much better for whole-string and sentence titlecasing:
+ * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 60
+ */
+#define U_TITLECASE_ADJUST_TO_CASED 0x400
+
+/**
+ * Option for string transformation functions to not first reset the Edits object.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_EDITS_NO_RESET 0x2000
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
+// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
+// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif // __STRINGOPTIONS_H__
diff --git a/third_party/icu4c/ndk_headers/unicode/ubidi.h b/third_party/icu4c/ndk_headers/unicode/ubidi.h
new file mode 100644
index 00000000000..c9dfcd540a1
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/ubidi.h
@@ -0,0 +1,2191 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidi.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999jul27
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDI_H
+#define UBIDI_H
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ *\file
+ * \brief C API: Bidi algorithm
+ *
+ *
+ *
+ * Note: Libraries that perform a bidirectional algorithm and
+ * reorder strings accordingly are sometimes called "Storage Layout Engines".
+ * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
+ * "Storage Layout Engines".
+ *
+ *
+ *
+ * The "limit" of a sequence of characters is the position just after their
+ * last character, i.e., one more than that position.
+ *
+ * Some of the API functions provide access to "runs".
+ * Such a "run" is defined as a sequence of characters
+ * that are at the same embedding level
+ * after performing the Bidi algorithm.
+ *
+ * @author Markus W. Scherer
+ * @version 1.0
+ *
+ *
+ * The basic assumptions are:
+ *
+ * It can also hold non-level values for the
+ * The related constants are not real, valid level values.
+ *
+ *
+ * Note that the value for
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).
+ *
+ * If this value is used in conjunction with reordering modes
+ *
+ *
+ * If reordering option
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).
+ *
+ * If this value is used in conjunction with reordering modes
+ *
+ *
+ * If reordering option As return value for As return value for
+ * This structure holds information about a paragraph (or multiple paragraphs)
+ * of text with Bidi-algorithm-related details, or about one line of
+ * such a paragraph.
+ * Reordering can be done on a line, or on one or more paragraphs which are
+ * then interpreted each as one single line.
+ * @stable ICU 2.0
+ */
+struct UBiDi;
+
+/** @stable ICU 2.0 */
+typedef struct UBiDi UBiDi;
+
+/**
+ * Allocate a
+ * This object can be reused for as long as it is not deallocated
+ * by calling
+ *
+ * Subsequent functions will not allocate any more memory, and are thus
+ * guaranteed not to fail because of lack of memory.
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ *
+ *
+ * Important:
+ * A parent The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi". With Output runs should be retrieved using Calling this function with argument Note: calling this function after setting the reordering mode with
+ * The normal operation of the Bidi algorithm as described
+ * in the Unicode Standard Annex #9 is to take text stored in logical
+ * (keyboard, typing) order and to determine how to reorder it for visual
+ * rendering. With the reordering mode set to a value other than
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here. In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation. In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with Note that option This option must be set or reset before calling
+ * This option is significant only with reordering modes which generate
+ * a result with Logical order, specifically: If this option is set in conjunction with reordering mode
+ * For other reordering modes, a minimum number of LRM or RLM characters
+ * will be added to the source text after reordering it so as to ensure
+ * round trip, i.e. when applying the inverse reordering mode on the
+ * resulting logical text with removal of Bidi marks
+ * (option This option will be ignored if specified together with option
+ * This option must be set or reset before calling
+ * This option nullifies option This option must be set or reset before calling
+ * This option specifies that the caller is interested in processing large
+ * text object in parts.
+ * The results of the successive calls are expected to be concatenated by the
+ * caller. Only the call for the last part will have this option bit off. When this option bit is on,
+ * preContext postContext
+ * "" "" The parser does not support context
+ * "let " "=7" Pre- and post-context only
+ * "let " "for=7" Pre- and post-context and error text
+ * "" "for" Error text only
+ *
+ *
+ * Bidi algorithm for ICU
+ *
+ * This is an implementation of the Unicode Bidirectional Algorithm.
+ * The algorithm is defined in the
+ * Unicode Standard Annex #9.General remarks about the API:
+ *
+ * In functions with an error code parameter,
+ * the pErrorCode
pointer must be valid
+ * and the value that it points to must not indicate a failure before
+ * the function call. Otherwise, the function returns immediately.
+ * After the function call, the value indicates success or failure. Sample code for the ICU Bidi API
+ *
+ * Rendering a paragraph with the ICU Bidi API
+ *
+ * This is (hypothetical) sample code that illustrates
+ * how the ICU Bidi API could be used to render a paragraph of text.
+ * Rendering code depends highly on the graphics system,
+ * therefore this sample code must make a lot of assumptions,
+ * which may or may not match any existing graphics system's properties.
+ *
+ *
+ *
+ *
+ *
+ * \code
+ *#include
+ */
+
+/*DOCXX_TAG*/
+/*@{*/
+
+/**
+ * UBiDiLevel is the type of the level values in this
+ * Bidi implementation.
+ * It holds an embedding level and indicates the visual direction
+ * by its bit 0 (even/odd value).paraLevel
and embeddingLevels
+ * arguments of ubidi_setPara()
; there:
+ *
+ *
+ *
+ * @see ubidi_setPara
+ *
+ * embeddingLevels[]
+ * value indicates whether the using application is
+ * specifying the level of a character to override whatever the
+ * Bidi implementation would resolve it to.paraLevel
can be set to the
+ * pseudo-level values UBIDI_DEFAULT_LTR
+ * and UBIDI_DEFAULT_RTL
.UBIDI_DEFAULT_XXX
can be used to specify
+ * a default for the paragraph level for
+ * when the ubidi_setPara()
function
+ * shall determine it but there is no
+ * strongly typed character in the input.UBIDI_DEFAULT_LTR
is even
+ * and the one for UBIDI_DEFAULT_RTL
is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * Note: The numeric values of the related constants will not change:
+ * They are tied to the use of 7-bit byte values (plus the override bit)
+ * and of the UBiDiLevel=uint8_t data type in this API.
+ *
+ * @see UBIDI_DEFAULT_LTR
+ * @see UBIDI_DEFAULT_RTL
+ * @see UBIDI_LEVEL_OVERRIDE
+ * @see UBIDI_MAX_EXPLICIT_LEVEL
+ * @stable ICU 2.0
+ */
+typedef uint8_t UBiDiLevel;
+
+/** Paragraph level setting.UBIDI_REORDER_INVERSE_LIKE_DIRECT
or
+ * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.UBIDI_OPTION_INSERT_MARKS
is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_LTR 0xfe
+
+/** Paragraph level setting.UBIDI_REORDER_INVERSE_LIKE_DIRECT
or
+ * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.UBIDI_OPTION_INSERT_MARKS
is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_RTL 0xff
+
+/**
+ * Maximum explicit embedding level.
+ * Same as the max_depth value in the
+ * Unicode Bidirectional Algorithm.
+ * (The maximum resolved level can be up to UBIDI_MAX_EXPLICIT_LEVEL+1
).
+ * @stable ICU 2.0
+ */
+#define UBIDI_MAX_EXPLICIT_LEVEL 125
+
+/** Bit flag for level input.
+ * Overrides directional properties.
+ * @stable ICU 2.0
+ */
+#define UBIDI_LEVEL_OVERRIDE 0x80
+
+/**
+ * Special value which can be returned by the mapping functions when a logical
+ * index has no corresponding visual index or vice-versa. This may happen
+ * for the logical-to-visual mapping of a Bidi control when option
+ * #UBIDI_OPTION_REMOVE_CONTROLS
is specified. This can also happen
+ * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
+ * by option #UBIDI_OPTION_INSERT_MARKS
.
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getVisualMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getLogicalMap
+ * @stable ICU 3.6
+ */
+#define UBIDI_MAP_NOWHERE (-1)
+
+/**
+ * UBiDiDirection
values indicate the text direction.
+ * @stable ICU 2.0
+ */
+enum UBiDiDirection {
+ /** Left-to-right text. This is a 0 value.
+ *
+ *
+ * @stable ICU 2.0
+ */
+ UBIDI_LTR,
+ /** Right-to-left text. This is a 1 value.
+ * ubidi_getDirection()
, it means
+ * that the source string contains no right-to-left characters, or
+ * that the source string is empty and the paragraph level is even.
+ * ubidi_getBaseDirection()
, it
+ * means that the first strong character of the source string has
+ * a left-to-right direction.
+ *
+ *
+ * @stable ICU 2.0
+ */
+ UBIDI_RTL,
+ /** Mixed-directional text.
+ * ubidi_getDirection()
, it means
+ * that the source string contains no left-to-right characters, or
+ * that the source string is empty and the paragraph level is odd.
+ * ubidi_getBaseDirection()
, it
+ * means that the first strong character of the source string has
+ * a right-to-left direction.
+ * ubidi_getDirection()
, it means
+ * that the source string contains both left-to-right and
+ * right-to-left characters.
+ * @stable ICU 2.0
+ */
+ UBIDI_MIXED,
+ /** No strongly directional text.
+ * ubidi_getBaseDirection()
, it means
+ * that the source string is missing or empty, or contains neither left-to-right
+ * nor right-to-left characters.
+ * @stable ICU 4.6
+ */
+ UBIDI_NEUTRAL
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBiDiDirection UBiDiDirection;
+
+/**
+ * Forward declaration of the UBiDi
structure for the declaration of
+ * the API functions. Its fields are implementation-specific.UBiDi
structure.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by ubidi_setPara()
+ * or the Bidi properties of a line within a paragraph by
+ * ubidi_setLine()
.ubidi_close()
.ubidi_setPara()
and ubidi_setLine()
will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @return An empty UBiDi
object.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_open(void);
+
+/**
+ * Allocate a UBiDi
structure with preallocated memory
+ * for internal structures.
+ * This function provides a UBiDi
object like ubidi_open()
+ * with no arguments, but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.maxRunCount
cannot be reasonably predetermined and should not
+ * be set to maxLength
(the only failproof value) to avoid
+ * wasting memory, then maxRunCount
could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with ubidi_open()
.
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ * will be preallocated for. An attempt to associate this object with a
+ * longer text will fail, unless this value is 0, which leaves the allocation
+ * up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ * that internal memory will be preallocated for. An attempt to access
+ * visual runs on an object that was not preallocated for as many runs
+ * as the text was actually resolved to will fail,
+ * unless this value is 0, which leaves the allocation up to the implementation.
+ * The number of runs depends on the actual text and maybe anywhere between
+ * 1 and maxLength
. It is typically small.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return An empty UBiDi
object with preallocated memory.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
+
+/**
+ * ubidi_close()
must be called to free the memory
+ * associated with a UBiDi object.UBiDi
object must not be destroyed or reused if
+ * it still has children.
+ * If a UBiDi
object has become the child
+ * of another one (its parent) by calling
+ * ubidi_setLine()
, then the child object must
+ * be destroyed (closed) or reused (by calling
+ * ubidi_setPara()
or ubidi_setLine()
)
+ * before the parent object.
+ *
+ * @param pBiDi is a UBiDi
object.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_close(UBiDi *pBiDi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiPointer
+ * "Smart pointer" class, closes a UBiDi via ubidi_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This function
+ * must be called before ubidi_setPara()
.
+ *
+ * isInverse
set to true
,
+ * this function changes the behavior of some of the subsequent functions
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.ubidi_getVisualRun()
.
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * ubidi_getVisualRun()
gets the reordered runs, these are actually
+ * the runs of the logically ordered output.isInverse
set to
+ * true
is equivalent to calling
+ * ubidi_setReorderingMode
with argument
+ * reorderingMode
+ * set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
.
+ * Calling this function with argument isInverse
set to
+ * false
is equivalent to calling
+ * ubidi_setReorderingMode
with argument
+ * reorderingMode
+ * set to #UBIDI_REORDER_DEFAULT
.
+ *
+ * @param pBiDi is a UBiDi
object.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
+
+/**
+ * Is this Bidi object set to perform the inverse Bidi algorithm?
+ * ubidi_setReorderingMode
will return true
if the
+ * reordering mode was set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
,
+ * false
for all other values.UBiDi
object.
+ * @return true if the Bidi object is set to perform the inverse Bidi algorithm
+ * by handling numbers as L.
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+
+U_CAPI UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi);
+
+/**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This function must be called before ubidi_setPara()
.
+ * Paragraph separators (B) may appear in the text. Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param pBiDi is a UBiDi
object.
+ *
+ * @param orderParagraphsLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
+
+/**
+ * Is this Bidi object set to allocate level 0 to block separators so that
+ * successive paragraphs progress from left to right?
+ *
+ * @param pBiDi is a UBiDi
object.
+ * @return true if the Bidi object is set to allocate level 0 to block
+ * separators.
+ *
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
+
+/**
+ * UBiDiReorderingMode
values indicate which variant of the Bidi
+ * algorithm to use.
+ *
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingMode {
+ /** Regular Logical to Visual Bidi algorithm according to Unicode.
+ * This is a 0 value.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_DEFAULT = 0,
+ /** Logical to Visual algorithm which handles numbers in a way which
+ * mimics the behavior of Windows XP.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_NUMBERS_SPECIAL,
+ /** Logical to Visual algorithm grouping numbers with adjacent R characters
+ * (reversible algorithm).
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+ /** Reorder runs only to transform a Logical LTR string to the Logical RTL
+ * string with the same display, or vice-versa.
+ * If this mode is set together with option
+ * #UBIDI_OPTION_INSERT_MARKS
, some Bidi controls in the source
+ * text may be removed and other controls may be added to produce the
+ * minimum combination which has the required display.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_RUNS_ONLY,
+ /** Visual to Logical algorithm which handles numbers like L
+ * (same algorithm as selected by ubidi_setInverse(true)
.
+ * @see ubidi_setInverse
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+ /** Visual to Logical algorithm equivalent to the regular Logical to Visual
+ * algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_LIKE_DIRECT,
+ /** Inverse Bidi (Visual to Logical) algorithm for the
+ * UBIDI_REORDER_NUMBERS_SPECIAL
Bidi algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
+} UBiDiReorderingMode;
+
+/**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This function must be called before ubidi_setPara()
, and stays
+ * in effect until called again with a different argument.
+ *
+ * #UBIDI_REORDER_DEFAULT
, this function changes the behavior of
+ * some of the subsequent functions in a way such that they implement an
+ * inverse Bidi algorithm or some other algorithm variants.
+ *
+ *
+ * #UBIDI_REORDER_DEFAULT
,
+ * the standard Bidi Logical to Visual algorithm is applied.#UBIDI_REORDER_NUMBERS_SPECIAL
,
+ * the algorithm used to perform Bidi transformations when calling
+ * ubidi_setPara
should approximate the algorithm used in
+ * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
+ * algorithm.
+ *
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ *
+ *
#UBIDI_REORDER_GROUP_NUMBERS_WITH_R
,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.#UBIDI_REORDER_RUNS_ONLY
,
+ * a "Logical to Logical" transformation must be performed:
+ *
+ *
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ * paraLevel
+ * in ubidi_setPara
) is even, the source text will be handled as
+ * LTR logical text and will be transformed to the RTL logical text which has
+ * the same LTR visual display.
+ * This mode may also be needed in the reverse case, when logical text which is
+ * basically English, with possible included phrases in Arabic or Hebrew, has to
+ * be displayed as if it had an odd embedding level.
+ *
+ * Both cases could be handled by adding LRE or RLE at the head of the text,
+ * if the display subsystem supports these formatting controls. If it does not,
+ * the problem may be handled by transforming the source text in this mode
+ * before displaying it, so that it will be displayed properly.
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.#UBIDI_REORDER_INVERSE_NUMBERS_AS_L
, an "inverse Bidi" algorithm
+ * is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option #UBIDI_INSERT_LRM_FOR_NUMERIC
can
+ * be used with function ubidi_writeReordered
to this end. This
+ * mode is equivalent to calling ubidi_setInverse()
with
+ * argument isInverse
set to true
.#UBIDI_REORDER_INVERSE_LIKE_DIRECT
, the "direct" Logical to Visual
+ * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
+ * This mode is similar to mode #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
+ * but is closer to the regular Bidi algorithm.
+ *
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode UBIDI_REORDER_INVERSE_NUMBERS_AS_L
.
+ * When used in conjunction with option
+ * #UBIDI_OPTION_INSERT_MARKS
, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
with option
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC
in calls to
+ * ubidi_writeReordered
.#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
+ *
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to "abc 123DEF."UBIDI_REORDER_INVERSE
),
+ * output runs should be retrieved using
+ * ubidi_getVisualRun()
, and the output text with
+ * ubidi_writeReordered()
. The caller should keep in mind that in
+ * "inverse Bidi" modes the input is actually visually ordered text and
+ * reordered output returned by ubidi_getVisualRun()
or
+ * ubidi_writeReordered()
are actually runs or character string
+ * of logically ordered output.
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.#UBIDI_OUTPUT_REVERSE
of
+ * ubidi_writeReordered
has no useful meaning and should not be
+ * used in conjunction with any value of the reordering mode specifying
+ * "inverse Bidi" or with value UBIDI_REORDER_RUNS_ONLY
.
+ *
+ * @param pBiDi is a UBiDi
object.
+ * @param reorderingMode specifies the required variant of the Bidi algorithm.
+ *
+ * @see UBiDiReorderingMode
+ * @see ubidi_setInverse
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
+
+/**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @param pBiDi is a UBiDi
object.
+ * @return the current reordering mode of the Bidi object
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+U_CAPI UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi);
+
+/**
+ * UBiDiReorderingOption
values indicate which options are
+ * specified to affect the Bidi algorithm.
+ *
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingOption {
+ /**
+ * option value for ubidi_setReorderingOptions
:
+ * disable all the options which can be set with this function
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_DEFAULT = 0,
+
+ /**
+ * option bit for ubidi_setReorderingOptions
:
+ * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+ * a reordering to a Logical order
+ *
+ * ubidi_setPara
.
+ *
+ *
+ * #UBIDI_REORDER_RUNS_ONLY
#UBIDI_REORDER_INVERSE_NUMBERS_AS_L
#UBIDI_REORDER_INVERSE_LIKE_DIRECT
#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
#UBIDI_REORDER_INVERSE_NUMBERS_AS_L
or with calling
+ * ubidi_setInverse(true)
, it implies
+ * option #UBIDI_INSERT_LRM_FOR_NUMERIC
+ * in calls to function ubidi_writeReordered()
.#UBIDI_OPTION_REMOVE_CONTROLS
set before calling
+ * ubidi_setPara()
or option #UBIDI_REMOVE_BIDI_CONTROLS
+ * in ubidi_writeReordered
), the result will be identical to the
+ * source text in the first transformation.
+ *
+ * #UBIDI_OPTION_REMOVE_CONTROLS
. It inhibits option
+ * UBIDI_REMOVE_BIDI_CONTROLS
in calls to function
+ * ubidi_writeReordered()
and it implies option
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC
in calls to function
+ * ubidi_writeReordered()
if the reordering mode is
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
.ubidi_setReorderingOptions
:
+ * remove Bidi control characters
+ *
+ * ubidi_setPara
.#UBIDI_OPTION_INSERT_MARKS
.
+ * It inhibits option #UBIDI_INSERT_LRM_FOR_NUMERIC
in calls
+ * to function ubidi_writeReordered()
and it implies option
+ * #UBIDI_REMOVE_BIDI_CONTROLS
in calls to that function.ubidi_setReorderingOptions
:
+ * process the output as part of a stream to be continued
+ *
+ * ubidi_setPara
.ubidi_setPara()
may process
+ * less than the full source text in order to truncate the text at a meaningful
+ * boundary. The caller should call ubidi_getProcessedLength()
+ * immediately after calling ubidi_setPara()
in order to
+ * determine how much of the source text has been processed.
+ * Source text beyond that length should be resubmitted in following calls to
+ * ubidi_setPara
. The processed length may be less than
+ * the length of the source text if a character preceding the last character of
+ * the source text constitutes a reasonable boundary (like a block separator)
+ * for text to be continued.
+ * If the last character of the source text constitutes a reasonable
+ * boundary, the whole text will be processed at once.
+ * If nowhere in the source text there exists
+ * such a reasonable boundary, the processed length will be zero.
+ * The caller should check for such an occurrence and do one of the following:
+ *
+ * In all cases, this option should be turned off before processing the last
+ * part of the text.UBIDI_OPTION_STREAMING
.
When the UBIDI_OPTION_STREAMING
option is used,
+ * it is recommended to call ubidi_orderParagraphsLTR()
with
+ * argument orderParagraphsLTR
set to true
before
+ * calling ubidi_setPara
so that later paragraphs may be
+ * concatenated to previous paragraphs on the right.
UBiDi
object.
+ * @param reorderingOptions is a combination of zero or more of the following
+ * options:
+ * #UBIDI_OPTION_DEFAULT
, #UBIDI_OPTION_INSERT_MARKS
,
+ * #UBIDI_OPTION_REMOVE_CONTROLS
, #UBIDI_OPTION_STREAMING
.
+ *
+ * @see ubidi_getReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
+
+/**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @param pBiDi is a UBiDi
object.
+ * @return the current reordering options of the Bidi object
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi);
+
+/**
+ * Set the context before a call to ubidi_setPara().+ * + * ubidi_setPara() computes the left-right directionality for a given piece + * of text which is supplied as one of its arguments. Sometimes this piece + * of text (the "main text") should be considered in context, because text + * appearing before ("prologue") and/or after ("epilogue") the main text + * may affect the result of this computation.
+ * + * This function specifies the prologue and/or the epilogue for the next + * call to ubidi_setPara(). The characters specified as prologue and + * epilogue should not be modified by the calling program until the call + * to ubidi_setPara() has returned. If successive calls to ubidi_setPara() + * all need specification of a context, ubidi_setContext() must be called + * before each call to ubidi_setPara(). In other words, a context is not + * "remembered" after the following successful call to ubidi_setPara().
+ * + * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to + * ubidi_setContext() which specifies a prologue, the paragraph level will + * be computed taking in consideration the text in the prologue.
+ * + * When ubidi_setPara() is called without a previous call to + * ubidi_setContext, the main text is handled as if preceded and followed + * by strong directional characters at the current paragraph level. + * Calling ubidi_setContext() with specification of a prologue will change + * this behavior by handling the main text as if preceded by the last + * strong character appearing in the prologue, if any. + * Calling ubidi_setContext() with specification of an epilogue will change + * the behavior of ubidi_setPara() by handling the main text as if followed + * by the first strong character or digit appearing in the epilogue, if any.
+ *
+ * Note 1: if ubidi_setContext
is called repeatedly without
+ * calling ubidi_setPara
, the earlier calls have no effect,
+ * only the last call will be remembered for the next call to
+ * ubidi_setPara
.
+ *
+ * Note 2: calling ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)
+ * cancels any previous setting of non-empty prologue or epilogue.
+ * The next call to ubidi_setPara()
will process no
+ * prologue or epilogue.
+ *
+ * Note 3: users must be aware that even after setting the context
+ * before a call to ubidi_setPara() to perform e.g. a logical to visual
+ * transformation, the resulting string may not be identical to what it
+ * would have been if all the text, including prologue and epilogue, had
+ * been processed together.
+ * Example (upper case letters represent RTL characters):
+ * prologue = "abc DE
"
+ * epilogue = none
+ * main text = "FGH xyz
"
+ * paraLevel = UBIDI_LTR
+ * display without prologue = "HGF xyz
"
+ * ("HGF" is adjacent to "xyz")
+ * display with prologue = "abc HGFED xyz
"
+ * ("HGF" is not adjacent to "xyz")
+ *
+ * @param pBiDi is a paragraph UBiDi
object.
+ *
+ * @param prologue is a pointer to the text which precedes the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no prologue to consider, then proLength
+ * must be zero and this pointer can be NULL.
+ *
+ * @param proLength is the length of the prologue; if proLength==-1
+ * then the prologue must be zero-terminated.
+ * Otherwise proLength must be >= 0. If proLength==0
, it means
+ * that there is no prologue to consider.
+ *
+ * @param epilogue is a pointer to the text which follows the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no epilogue to consider, then epiLength
+ * must be zero and this pointer can be NULL.
+ *
+ * @param epiLength is the length of the epilogue; if epiLength==-1
+ * then the epilogue must be zero-terminated.
+ * Otherwise epiLength must be >= 0. If epiLength==0
, it means
+ * that there is no epilogue to consider.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 4.8
+ */
+U_CAPI void U_EXPORT2
+ubidi_setContext(UBiDi *pBiDi,
+ const UChar *prologue, int32_t proLength,
+ const UChar *epilogue, int32_t epiLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * Unicode Standard Annex #9,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .
+ * + * This function takes a piece of plain text containing one or more paragraphs, + * with or without externally specified embedding levels from styled + * text and computes the left-right-directionality of each character.
+ *
+ * If the entire text is all of the same directionality, then
+ * the function may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * @param pBiDi A UBiDi
object allocated with ubidi_open()
+ * which will be set to contain the reordering information,
+ * especially the resolved levels for all the characters in text
.
+ *
+ * @param text is a pointer to the text that the Bidi algorithm will be performed on.
+ * This pointer is stored in the UBiDi object and can be retrieved
+ * with ubidi_getText()
.
+ * Note: the text must be (at least) length
long.
+ *
+ * @param length is the length of the text; if length==-1
then
+ * the text must be zero-terminated.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the function shall determine the paragraph level from the text,
+ * then paraLevel
can be set to
+ * either #UBIDI_DEFAULT_LTR
+ * or #UBIDI_DEFAULT_RTL
; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and #UBIDI_MAX_EXPLICIT_LEVEL
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ * ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * #UBIDI_LEVEL_OVERRIDE
bit set.
+ * Aside from that bit, it must be
+ * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL
,
+ * except that level 0 is always allowed.
+ * Level 0 for a paragraph separator prevents reordering of paragraphs;
+ * this only works reliably if #UBIDI_LEVEL_OVERRIDE
+ * is also set for paragraph separators.
+ * Level 0 for other characters is treated as a wildcard
+ * and is lifted up to the resolved level of the surrounding paragraph.
+ * Caution: A copy of this pointer, not of the levels,
+ * will be stored in the UBiDi
object;
+ * the embeddingLevels
array must not be
+ * deallocated before the UBiDi
structure is destroyed or reused,
+ * and the embeddingLevels
+ * should not be modified to avoid unexpected results on subsequent Bidi operations.
+ * However, the ubidi_setPara()
and
+ * ubidi_setLine()
functions may modify some or all of the levels.
+ * After the UBiDi
object is reused or destroyed, the caller
+ * must take care of the deallocation of the embeddingLevels
array.
+ * Note: the embeddingLevels
array must be
+ * at least length
long.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+ UErrorCode *pErrorCode);
+
+/**
+ * ubidi_setLine()
sets a UBiDi
to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a UBiDi
object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.
+ * In the new line object, the indexes will range from 0 to limit-start-1
.
+ *
+ * This is used after calling ubidi_setPara()
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a UBiDi
object that represents a line.
+ *
+ * Important: pLineBiDi
shares data with
+ * pParaBiDi
.
+ * You must destroy or reuse pLineBiDi
before pParaBiDi
.
+ * In other words, you must destroy or reuse the UBiDi
object for a line
+ * before the object for its parent paragraph.
+ *
+ * The text pointer that was stored in
+ *
+ * @param pBiDi is the paragraph or line
+ *
+ * @param pBiDi is the paragraph
+ *
+ * Note that this function may allocate memory under some
+ * circumstances, unlike
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param pBiDi is the paragraph or line
+ *
+ * Use of
+ *
+ * The value returned may be
+ * When the visual output is altered by using options of
+ *
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of
+ *
+ * The value returned may be
+ * This is the inverse function to
+ * When the visual output is altered by using options of
+ *
+ * Some values in the map may be
+ * When the visual output is altered by using options of
+ *
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of
+ * Some values in the map may be
+ * When the visual output is altered by using options of
+ *
+ * The index map will result in
+ * The index map will result in This option does not imply corresponding adjustment of the index
+ * mappings. This option does not imply corresponding adjustment of the index
+ * mappings. This has the same effect as calling Usually, the function pointer will be propagated to a If a This may be useful for assigning Bidi classes to PUA characters, or
+ * for special application needs. For instance, an application may want to
+ * handle all spaces like L or R characters (according to the base direction)
+ * when creating the visual ordering of logical lines which are part of a report
+ * organized in columns: there should not be interaction between adjacent
+ * cells.
+ *
+ * @param pBiDi is the paragraph
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ *
+ * Note: The locale keyword "lb" can be used to modify line break
+ * behavior according to the CSS level 3 line-break options, see
+ *
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ *
+ * Note: The locale keyword "ss" can be used to enable use of
+ * segmentation suppression data (preventing breaks in English after
+ * abbreviations such as "Mr." or "Est.", for example), as follows:
+ * "en@ss=standard".
+ *
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ *
+ * Character boundary analysis identifies the boundaries of
+ * "Extended Grapheme Clusters", which are groupings of codepoints
+ * that should be treated as character-like units for many text operations.
+ * Please see Unicode Standard Annex #29, Unicode Text Segmentation,
+ * http://www.unicode.org/reports/tr29/ for additional information
+ * on grapheme clusters and guidelines on their use.
+ *
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ *
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ *
+ * In addition to the plain C API defined in this header file, an
+ * object oriented C++ API with equivalent functionality is defined in the
+ * file brkiter.h.
+ *
+ * Code snippets illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * https://unicode-org.github.io/icu/userguide/boundaryanalysis/
+ * and in the sample program icu/source/samples/break/break.cpp
+ */
+
+/** The possible types of text boundaries. @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+ /** Character breaks @stable ICU 2.0 */
+ UBRK_CHARACTER = 0,
+ /** Word breaks @stable ICU 2.0 */
+ UBRK_WORD = 1,
+ /** Line breaks @stable ICU 2.0 */
+ UBRK_LINE = 2,
+ /** Sentence breaks @stable ICU 2.0 */
+ UBRK_SENTENCE = 3,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Title Case breaks
+ * The iterator created using this type locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.
+ *
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+ */
+ UBRK_TITLE = 4,
+ /**
+ * One more than the highest normal UBreakIteratorType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBRK_COUNT = 5
+#endif // U_HIDE_DEPRECATED_API
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ * @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ * Enum constants for the word break tags returned by
+ * getRuleStatus(). A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+ /** Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation. */
+ UBRK_WORD_NONE = 0,
+ /** Upper bound for tags for uncategorized words. */
+ UBRK_WORD_NONE_LIMIT = 100,
+ /** Tag value for words that appear to be numbers, lower limit. */
+ UBRK_WORD_NUMBER = 100,
+ /** Tag value for words that appear to be numbers, upper limit. */
+ UBRK_WORD_NUMBER_LIMIT = 200,
+ /** Tag value for words that contain letters, excluding
+ * hiragana, katakana or ideographic characters, lower limit. */
+ UBRK_WORD_LETTER = 200,
+ /** Tag value for words containing letters, upper limit */
+ UBRK_WORD_LETTER_LIMIT = 300,
+ /** Tag value for words containing kana characters, lower limit */
+ UBRK_WORD_KANA = 300,
+ /** Tag value for words containing kana characters, upper limit */
+ UBRK_WORD_KANA_LIMIT = 400,
+ /** Tag value for words containing ideographic characters, lower limit */
+ UBRK_WORD_IDEO = 400,
+ /** Tag value for words containing ideographic characters, upper limit */
+ UBRK_WORD_IDEO_LIMIT = 500
+} UWordBreak;
+
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions. Note that
+ * locale keys such as "lb" and "ss" may be used to modify text break behavior,
+ * see general discussion of BreakIterator C API.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
+ * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
+ * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
+ * compatible across different major versions of ICU, nor across platforms of different
+ * endianness or different base character set family (ASCII vs EBCDIC).
+ * @param binaryRules A set of compiled binary rules specifying the text breaking
+ * conventions. Ownership of the storage containing the compiled
+ * rules remains with the caller of this function. The compiled
+ * rules must not be modified or deleted during the life of the
+ * break iterator.
+ * @param rulesLength The length of binaryRules in bytes; must be >= 0.
+ * @param text The text to be iterated over. May be null, in which case
+ * ubrk_setText() is used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status Pointer to UErrorCode to receive any errors.
+ * @return UBreakIterator for the specified rules.
+ * @see ubrk_getBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
+ const UChar * text, int32_t textLength,
+ UErrorCode * status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Thread safe cloning operation.
+ * @param bi iterator to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @stable ICU 69
+ */
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_clone(const UBreakIterator *bi,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+ * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
+ */
+#define U_BRK_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBreakIteratorPointer
+ * "Smart pointer" class, closes a UBreakIterator via ubrk_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Sets an existing iterator to point to a new piece of text.
+ * The break iterator retains a pointer to the supplied text.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status);
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Advance the iterator to the boundary following the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the boundary preceding the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to zero, the start of the text being scanned.
+ * @param bi The break iterator to use.
+ * @return The new iterator position (zero).
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the index immediately beyond the last character in the text being scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately beyond the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The new position is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+ * Advance the iterator to the first boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_CAPI const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specified position is a boundary position. As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_CAPI UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. For rules that do not specify a
+ * status, a default value of 0 is returned.
+ *
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. The default status value for rules
+ * that do not explicitly provide one is zero.
+ *
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+/**
+ * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
+ * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
+ * more quickly than using ubrk_openRules. The compiled rules are not compatible across
+ * different major versions of ICU, nor across platforms of different endianness or
+ * different base character set family (ASCII vs EBCDIC). Supports preflighting (with
+ * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
+ * the binaryRules buffer. However, whether preflighting or not, if the actual length
+ * is greater than INT32_MAX, then the function returns 0 and sets *status to
+ * U_INDEX_OUTOFBOUNDS_ERROR.
+
+ * @param bi The break iterator to use.
+ * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
+ * preflighting.
+ * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
+ * preflighting. Must be >= 0.
+ * @param status Pointer to UErrorCode to receive any errors, such as
+ * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual byte length of the binary rules, if <= INT32_MAX;
+ * otherwise 0. If not preflighting and this is larger than
+ * rulesCapacity, *status will be set to an error.
+ * @see ubrk_openBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getBinaryRules(UBreakIterator *bi,
+ uint8_t * binaryRules, int32_t rulesCapacity,
+ UErrorCode * status);
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/ucal.h b/third_party/icu4c/ndk_headers/unicode/ucal.h
new file mode 100644
index 00000000000..300c6c623f4
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/ucal.h
@@ -0,0 +1,1682 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef UCAL_H
+#define UCAL_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+#include "unicode/uloc.h"
+
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * \file
+ * \brief C API: Calendar
+ *
+ *
+ * Types of
+ * Like other locale-sensitive C API, calendar API provides a
+ * function,
+ * A
+ * When computing a
+ * Insufficient information. The calendar will use default
+ * information to specify the missing fields. This may vary by calendar; for
+ * the Gregorian calendar, the default for a field is the same as that of the
+ * start of the epoch: i.e., UCAL_YEAR = 1970, UCAL_MONTH = JANUARY, UCAL_DATE = 1, etc.
+ *
+ *
+ * Inconsistent information. If fields conflict, the calendar
+ * will give preference to fields set more recently. For example, when
+ * determining the day, the calendar will look for one of the following
+ * combinations of fields. The most recent combination, as determined by the
+ * most recently set single field, will be used.
+ *
+ * \htmlonly
+ * Note: for some non-Gregorian calendars, different
+ * fields may be necessary for complete disambiguation. For example, a full
+ * specification of the historical Arabic astronomical calendar requires year,
+ * month, day-of-month and day-of-week in some cases.
+ *
+ *
+ * Note: There are certain possible ambiguities in
+ * interpretation of certain singular times, which are resolved in the
+ * following ways:
+ *
+ * The date or time format strings are not part of the definition of a
+ * calendar, as those must be modifiable or overridable by the user at
+ * runtime. Use {@link icu::DateFormat}
+ * to format dates.
+ *
+ *
+ *
+ *
+ * The Japanese calendar uses a combination of era name and year number.
+ * When an emperor of Japan abdicates and a new emperor ascends the throne,
+ * a new era is declared and year number is reset to 1. Even if the date of
+ * abdication is scheduled ahead of time, the new era name might not be
+ * announced until just before the date. In such case, ICU4C may include
+ * a start date of future era without actual era name, but not enabled
+ * by default. ICU4C users who want to test the behavior of the future era
+ * can enable the tentative era by:
+ * This function is not thread safe.
+ * Note: When unknown TimeZone ID is specified or if the TimeZone ID specified is "Etc/Unknown",
+ * the UCalendar returned by the function is initialized with GMT zone with TimeZone ID
+ * There are system time zones that cannot be mapped to Windows zones. When the input
+* system time zone ID is unknown or unmappable to a Windows time zone, then this
+* function returns 0 as the result length, but the operation itself remains successful
+* (no error status set on return).
+*
+* This implementation utilizes
+* Zone-Tzid mapping data. The mapping data is updated time to time. To get the latest changes,
+* please read the ICU user guide section
+* Updating the Time Zone Data.
+*
+* @param id A system time zone ID.
+* @param len The length of Not all Windows time zones can be mapped to system time zones. When the input
+* Windows time zone ID is unknown or unmappable to a system time zone, then this
+* function returns 0 as the result length, but the operation itself remains successful
+* (no error status set on return).
+*
+* This implementation utilizes
+* Zone-Tzid mapping data. The mapping data is updated time to time. To get the latest changes,
+* please read the ICU user guide section
+* Updating the Time Zone Data.
+*
+* @param winid A Windows time zone ID.
+* @param len The length of
+ * The
+ * If the digit is less than 10, then
+ *
+ * For more information about the collation service see
+ * the User Guide.
+ *
+ * Collation service provides correct sorting orders for most locales supported in ICU.
+ * If specific data for a locale is not available, the orders eventually falls back
+ * to the CLDR root sort order.
+ *
+ * Sort ordering may be customized by providing your own set of rules. For more on
+ * this subject see the
+ * Collation Customization section of the User Guide.
+ *
+ * @see UCollationResult
+ * @see UNormalizationMode
+ * @see UCollationStrength
+ * @see UCollationElements
+ */
+
+/** A collator.
+* For usage in C programs.
+*/
+struct UCollator;
+/** structure representing a collator object instance
+ * @stable ICU 2.0
+ */
+typedef struct UCollator UCollator;
+
+
+/**
+ * UCOL_LESS is returned if source string is compared to be less than target
+ * string in the ucol_strcoll() method.
+ * UCOL_EQUAL is returned if source string is compared to be equal to target
+ * string in the ucol_strcoll() method.
+ * UCOL_GREATER is returned if source string is compared to be greater than
+ * target string in the ucol_strcoll() method.
+ * @see ucol_strcoll()
+ *
+ * Possible values for a comparison result
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** string a == string b */
+ UCOL_EQUAL = 0,
+ /** string a > string b */
+ UCOL_GREATER = 1,
+ /** string a < string b */
+ UCOL_LESS = -1
+} UCollationResult ;
+
+
+/** Enum containing attribute values for controlling collation behavior.
+ * Here are all the allowable values. Not every attribute can take every value. The only
+ * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
+ * value for that locale
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** accepted by most attributes */
+ UCOL_DEFAULT = -1,
+
+ /** Primary collation strength */
+ UCOL_PRIMARY = 0,
+ /** Secondary collation strength */
+ UCOL_SECONDARY = 1,
+ /** Tertiary collation strength */
+ UCOL_TERTIARY = 2,
+ /** Default collation strength */
+ UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
+ UCOL_CE_STRENGTH_LIMIT,
+ /** Quaternary collation strength */
+ UCOL_QUATERNARY=3,
+ /** Identical collation strength */
+ UCOL_IDENTICAL=15,
+ UCOL_STRENGTH_LIMIT,
+
+ /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
+ UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+ & UCOL_DECOMPOSITION_MODE*/
+ UCOL_OFF = 16,
+ /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
+ UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
+ & UCOL_DECOMPOSITION_MODE*/
+ UCOL_ON = 17,
+
+ /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
+ UCOL_SHIFTED = 20,
+ /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
+ UCOL_NON_IGNORABLE = 21,
+
+ /** Valid for UCOL_CASE_FIRST -
+ lower case sorts before upper case */
+ UCOL_LOWER_FIRST = 24,
+ /** upper case sorts before lower case */
+ UCOL_UPPER_FIRST = 25,
+} UColAttributeValue;
+
+/**
+ * Enum containing the codes for reordering segments of the collation table that are not script
+ * codes. These reordering codes are to be used in conjunction with the script codes.
+ * @see ucol_getReorderCodes
+ * @see ucol_setReorderCodes
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @stable ICU 4.8
+ */
+ typedef enum {
+ /**
+ * A special reordering code that is used to specify the default
+ * reordering codes for a locale.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_DEFAULT = -1,
+ /**
+ * A special reordering code that is used to specify no reordering codes.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN,
+ /**
+ * A special reordering code that is used to specify all other codes used for
+ * reordering except for the codes lised as UColReorderCode values and those
+ * listed explicitly in a reordering.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN,
+ /**
+ * Characters with the space property.
+ * This is equivalent to the rule value "space".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_SPACE = 0x1000,
+ /**
+ * The first entry in the enumeration of reordering groups. This is intended for use in
+ * range checking and enumeration of the reorder codes.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
+ /**
+ * Characters with the punctuation property.
+ * This is equivalent to the rule value "punct".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
+ /**
+ * Characters with the symbol property.
+ * This is equivalent to the rule value "symbol".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_SYMBOL = 0x1002,
+ /**
+ * Characters with the currency property.
+ * This is equivalent to the rule value "currency".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_CURRENCY = 0x1003,
+ /**
+ * Characters with the digit property.
+ * This is equivalent to the rule value "digit".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_DIGIT = 0x1004,
+} UColReorderCode;
+
+/**
+ * Base letter represents a primary difference. Set comparison
+ * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
+ * Use this to set the strength of a Collator object.
+ * Example of primary difference, "abc" < "abd"
+ *
+ * Diacritical differences on the same base letter represent a secondary
+ * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary
+ * differences. Use this to set the strength of a Collator object.
+ * Example of secondary difference, "ä" >> "a".
+ *
+ * Uppercase and lowercase versions of the same character represents a
+ * tertiary difference. Set comparison level to UCOL_TERTIARY to include
+ * all comparison differences. Use this to set the strength of a Collator
+ * object.
+ * Example of tertiary difference, "abc" <<< "ABC".
+ *
+ * Two characters are considered "identical" when they have the same
+ * unicode spellings. UCOL_IDENTICAL.
+ * For example, "ä" == "ä".
+ *
+ * UCollationStrength is also used to determine the strength of sort keys
+ * generated from UCollator objects
+ * These values can be now found in the UColAttributeValue enum.
+ * @stable ICU 2.0
+ **/
+typedef UColAttributeValue UCollationStrength;
+
+/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
+ * value, as well as the values specific to each one.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Attribute for direction of secondary weights - used in Canadian French.
+ * Acceptable values are UCOL_ON, which results in secondary weights
+ * being considered backwards and UCOL_OFF which treats secondary
+ * weights in the order they appear.
+ * @stable ICU 2.0
+ */
+ UCOL_FRENCH_COLLATION,
+ /** Attribute for handling variable elements.
+ * Acceptable values are UCOL_NON_IGNORABLE (default)
+ * which treats all the codepoints with non-ignorable
+ * primary weights in the same way,
+ * and UCOL_SHIFTED which causes codepoints with primary
+ * weights that are equal or below the variable top value
+ * to be ignored on primary level and moved to the quaternary
+ * level.
+ * @stable ICU 2.0
+ */
+ UCOL_ALTERNATE_HANDLING,
+ /** Controls the ordering of upper and lower case letters.
+ * Acceptable values are UCOL_OFF (default), which orders
+ * upper and lower case letters in accordance to their tertiary
+ * weights, UCOL_UPPER_FIRST which forces upper case letters to
+ * sort before lower case letters, and UCOL_LOWER_FIRST which does
+ * the opposite.
+ * @stable ICU 2.0
+ */
+ UCOL_CASE_FIRST,
+ /** Controls whether an extra case level (positioned before the third
+ * level) is generated or not. Acceptable values are UCOL_OFF (default),
+ * when case level is not generated, and UCOL_ON which causes the case
+ * level to be generated. Contents of the case level are affected by
+ * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
+ * accent differences in a string is to set the strength to UCOL_PRIMARY
+ * and enable case level.
+ * @stable ICU 2.0
+ */
+ UCOL_CASE_LEVEL,
+ /** Controls whether the normalization check and necessary normalizations
+ * are performed. When set to UCOL_OFF (default) no normalization check
+ * is performed. The correctness of the result is guaranteed only if the
+ * input data is in so-called FCD form (see users manual for more info).
+ * When set to UCOL_ON, an incremental check is performed to see whether
+ * the input data is in the FCD form. If the data is not in the FCD form,
+ * incremental NFD normalization is performed.
+ * @stable ICU 2.0
+ */
+ UCOL_NORMALIZATION_MODE,
+ /** An alias for UCOL_NORMALIZATION_MODE attribute.
+ * @stable ICU 2.0
+ */
+ UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
+ /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
+ * for most locales (except Japanese) is tertiary.
+ *
+ * Quaternary strength
+ * is useful when combined with shifted setting for alternate handling
+ * attribute and for JIS X 4061 collation, when it is used to distinguish
+ * between Katakana and Hiragana.
+ * Otherwise, quaternary level
+ * is affected only by the number of non-ignorable code points in
+ * the string.
+ *
+ * Identical strength is rarely useful, as it amounts
+ * to codepoints of the NFD form of the string.
+ * @stable ICU 2.0
+ */
+ UCOL_STRENGTH,
+ /**
+ * When turned on, this attribute makes
+ * substrings of digits sort according to their numeric values.
+ *
+ * This is a way to get '100' to sort AFTER '2'. Note that the longest
+ * digit substring that can be treated as a single unit is
+ * 254 digits (not counting leading zeros). If a digit substring is
+ * longer than that, the digits beyond the limit will be treated as a
+ * separate digit substring.
+ *
+ * A "digit" in this sense is a code point with General_Category=Nd,
+ * which does not include circled numbers, roman numerals, etc.
+ * Only a contiguous digit substring is considered, that is,
+ * non-negative integers without separators.
+ * There is no support for plus/minus signs, decimals, exponents, etc.
+ *
+ * @stable ICU 2.8
+ */
+ UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
+
+ /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
+ * it is needed for layout of RuleBasedCollator object. */
+} UColAttribute;
+
+/** Options for retrieving the rule string
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /**
+ * Retrieves the tailoring rules only.
+ * Same as calling the version of getRules() without UColRuleOption.
+ * @stable ICU 2.0
+ */
+ UCOL_TAILORING_ONLY,
+ /**
+ * Retrieves the "UCA rules" concatenated with the tailoring rules.
+ * The "UCA rules" are an approximation of the root collator's sort order.
+ * They are almost never used or useful at runtime and can be removed from the data.
+ * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
+ * @stable ICU 2.0
+ */
+ UCOL_FULL_RULES
+} UColRuleOption ;
+
+/**
+ * Open a UCollator for comparing strings.
+ *
+ * For some languages, multiple collation types are available;
+ * for example, "de@collation=phonebook".
+ * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
+ * in the old locale extension syntax ("el@colCaseFirst=upper")
+ * or in language tag syntax ("el-u-kf-upper").
+ * See User Guide: Collation API.
+ *
+ * The UCollator pointer is used in all the calls to the Collation
+ * service. After finished, collator must be disposed of by calling
+ * {@link #ucol_close }.
+ * @param loc The locale containing the required collation rules.
+ * Special values for locales can be passed in -
+ * if NULL is passed for the locale, the default locale
+ * collation rules will be used. If empty string ("") or
+ * "root" are passed, the root collator will be returned.
+ * @param status A pointer to a UErrorCode to receive any errors
+ * @return A pointer to a UCollator, or 0 if an error occurred.
+ * @see ucol_openRules
+ * @see ucol_clone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_CAPI UCollator* U_EXPORT2
+ucol_open(const char *loc, UErrorCode *status);
+
+/**
+ * Produce a UCollator instance according to the rules supplied.
+ * The rules are used to change the default ordering, defined in the
+ * UCA in a process called tailoring. The resulting UCollator pointer
+ * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
+ * @param rules A string describing the collation rules. For the syntax
+ * of the rules please see users guide.
+ * @param rulesLength The length of rules, or -1 if null-terminated.
+ * @param normalizationMode The normalization mode: One of
+ * UCOL_OFF (expect the text to not need normalization),
+ * UCOL_ON (normalize), or
+ * UCOL_DEFAULT (set the mode according to the rules)
+ * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
+ * @param parseError A pointer to UParseError to receive information about errors
+ * occurred during parsing. This argument can currently be set
+ * to NULL, but at users own risk. Please provide a real structure.
+ * @param status A pointer to a UErrorCode to receive any errors
+ * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
+ * of error - please use status argument to check for errors.
+ * @see ucol_open
+ * @see ucol_clone
+ * @see ucol_close
+ * @stable ICU 2.0
+ */
+U_CAPI UCollator* U_EXPORT2
+ucol_openRules( const UChar *rules,
+ int32_t rulesLength,
+ UColAttributeValue normalizationMode,
+ UCollationStrength strength,
+ UParseError *parseError,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Get a set containing the expansions defined by the collator. The set includes
+ * both the root collator's expansions and the expansions defined by the tailoring
+ * @param coll collator
+ * @param contractions if not NULL, the set to hold the contractions
+ * @param expansions if not NULL, the set to hold the expansions
+ * @param addPrefixes add the prefix contextual elements to contractions
+ * @param status to hold the error code
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucol_getContractionsAndExpansions( const UCollator *coll,
+ USet *contractions, USet *expansions,
+ UBool addPrefixes, UErrorCode *status);
+
+/**
+ * Close a UCollator.
+ * Once closed, a UCollator should not be used. Every open collator should
+ * be closed. Otherwise, a memory leak will result.
+ * @param coll The UCollator to close.
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_clone
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucol_close(UCollator *coll);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCollatorPointer
+ * "Smart pointer" class, closes a UCollator via ucol_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Compare two strings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcoll( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength);
+
+/**
+* Compare two strings in UTF-8.
+* The strings will be compared using the options already specified.
+* Note: When input string contains malformed a UTF-8 byte sequence,
+* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
+* @param coll The UCollator containing the comparison rules.
+* @param source The source UTF-8 string.
+* @param sourceLength The length of source, or -1 if null-terminated.
+* @param target The target UTF-8 string.
+* @param targetLength The length of target, or -1 if null-terminated.
+* @param status A pointer to a UErrorCode to receive any errors
+* @return The result of comparing the strings; one of UCOL_EQUAL,
+* UCOL_GREATER, UCOL_LESS
+* @see ucol_greater
+* @see ucol_greaterOrEqual
+* @see ucol_equal
+* @stable ICU 50
+*/
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollUTF8(
+ const UCollator *coll,
+ const char *source,
+ int32_t sourceLength,
+ const char *target,
+ int32_t targetLength,
+ UErrorCode *status);
+
+/**
+ * Determine if one string is greater than another.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return true if source is greater than target, false otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greaterOrEqual
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucol_greater(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Determine if one string is greater than or equal to another.
+ * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return true if source is greater than or equal to target, false otherwise.
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_equal
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucol_greaterOrEqual(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Compare two strings for equality.
+ * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
+ * @param coll The UCollator containing the comparison rules.
+ * @param source The source string.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param target The target string.
+ * @param targetLength The length of target, or -1 if null-terminated.
+ * @return true if source is equal to target, false otherwise
+ * @see ucol_strcoll
+ * @see ucol_greater
+ * @see ucol_greaterOrEqual
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucol_equal(const UCollator *coll,
+ const UChar *source, int32_t sourceLength,
+ const UChar *target, int32_t targetLength);
+
+/**
+ * Compare two UTF-8 encoded strings.
+ * The strings will be compared using the options already specified.
+ * @param coll The UCollator containing the comparison rules.
+ * @param sIter The source string iterator.
+ * @param tIter The target string iterator.
+ * @return The result of comparing the strings; one of UCOL_EQUAL,
+ * UCOL_GREATER, UCOL_LESS
+ * @param status A pointer to a UErrorCode to receive any errors
+ * @see ucol_strcoll
+ * @stable ICU 2.6
+ */
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollIter( const UCollator *coll,
+ UCharIterator *sIter,
+ UCharIterator *tIter,
+ UErrorCode *status);
+
+/**
+ * Get the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to query.
+ * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
+ * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
+ * @see ucol_setStrength
+ * @stable ICU 2.0
+ */
+U_CAPI UCollationStrength U_EXPORT2
+ucol_getStrength(const UCollator *coll);
+
+/**
+ * Set the collation strength used in a UCollator.
+ * The strength influences how strings are compared.
+ * @param coll The UCollator to set.
+ * @param strength The desired collation strength; one of UCOL_PRIMARY,
+ * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
+ * @see ucol_getStrength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucol_setStrength(UCollator *coll,
+ UCollationStrength strength);
+
+/**
+ * Retrieves the reordering codes for this collator.
+ * These reordering codes are a combination of UScript codes and UColReorderCode entries.
+ * @param coll The UCollator to query.
+ * @param dest The array to fill with the script ordering.
+ * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
+ * will only return the length of the result without writing any codes (pre-flighting).
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
+ * failure before the function call.
+ * @return The number of reordering codes written to the dest array.
+ * @see ucol_setReorderCodes
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getReorderCodes(const UCollator* coll,
+ int32_t* dest,
+ int32_t destCapacity,
+ UErrorCode *pErrorCode);
+/**
+ * Sets the reordering codes for this collator.
+ * Collation reordering allows scripts and some other groups of characters
+ * to be moved relative to each other. This reordering is done on top of
+ * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
+ * at the start and/or the end of the collation order. These groups are specified using
+ * UScript codes and UColReorderCode entries.
+ *
+ * By default, reordering codes specified for the start of the order are placed in the
+ * order given after several special non-script blocks. These special groups of characters
+ * are space, punctuation, symbol, currency, and digit. These special groups are represented with
+ * UColReorderCode entries. Script groups can be intermingled with
+ * these special non-script groups if those special groups are explicitly specified in the reordering.
+ *
+ * The special code OTHERS stands for any script that is not explicitly
+ * mentioned in the list of reordering codes given. Anything that is after OTHERS
+ * will go at the very end of the reordering in the order given.
+ *
+ * The special reorder code DEFAULT will reset the reordering for this collator
+ * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
+ * was specified when this collator was created from resource data or from rules. The
+ * DEFAULT code must be the sole code supplied when it is used.
+ * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
+ *
+ * The special reorder code NONE will remove any reordering for this collator.
+ * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
+ * NONE code must be the sole code supplied when it is used.
+ *
+ * @param coll The UCollator to set.
+ * @param reorderCodes An array of script codes in the new order. This can be NULL if the
+ * length is also set to 0. An empty array will clear any reordering codes on the collator.
+ * @param reorderCodesLength The length of reorderCodes.
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
+ * failure before the function call.
+ * @see ucol_getReorderCodes
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
+ */
+U_CAPI void U_EXPORT2
+ucol_setReorderCodes(UCollator* coll,
+ const int32_t* reorderCodes,
+ int32_t reorderCodesLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
+ * codes will be grouped and must reorder together.
+ * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
+ * for example Hiragana and Katakana.
+ *
+ * @param reorderCode The reorder code to determine equivalence for.
+ * @param dest The array to fill with the script ordering.
+ * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
+ * will only return the length of the result without writing any codes (pre-flighting).
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
+ * a failure before the function call.
+ * @return The number of reordering codes written to the dest array.
+ * @see ucol_setReorderCodes
+ * @see ucol_getReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getEquivalentReorderCodes(int32_t reorderCode,
+ int32_t* dest,
+ int32_t destCapacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the display name for a UCollator.
+ * The display name is suitable for presentation to a user.
+ * @param objLoc The locale of the collator in question.
+ * @param dispLoc The locale for display.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getDisplayName( const char *objLoc,
+ const char *dispLoc,
+ UChar *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Get a locale for which collation rules are available.
+ * A UCollator in a locale returned by this function will perform the correct
+ * collation for the locale.
+ * @param localeIndex The index of the desired locale.
+ * @return A locale for which collation rules are available, or 0 if none.
+ * @see ucol_countAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+ucol_getAvailable(int32_t localeIndex);
+
+/**
+ * Determine how many locales have collation rules available.
+ * This function is most useful as determining the loop ending condition for
+ * calls to {@link #ucol_getAvailable }.
+ * @return The number of locales for which collation rules are available.
+ * @see ucol_getAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_countAvailable(void);
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Create a string enumerator of all locales for which a valid
+ * collator may be opened.
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucol_openAvailableLocales(UErrorCode *status);
+#endif
+
+/**
+ * Create a string enumerator of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucol_getKeywords(UErrorCode *status);
+
+/**
+ * Given a keyword, create a string enumeration of all values
+ * for that keyword that are currently in use.
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords. If any other keyword is passed in, *status is set
+ * to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param status input-output error code
+ * @return a string enumeration over collation keyword values, or NULL
+ * upon error. The caller is responsible for closing the result.
+ * @stable ICU 3.0
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucol_getKeywordValues(const char *keyword, UErrorCode *status);
+
+/**
+ * Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "collation" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @param status error status
+ * @return a string enumeration over keyword values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucol_getKeywordValuesForLocale(const char* key,
+ const char* locale,
+ UBool commonlyUsed,
+ UErrorCode* status);
+
+/**
+ * Return the functionally equivalent locale for the specified
+ * input locale, with respect to given keyword, for the
+ * collation service. If two different input locale + keyword
+ * combinations produce the same result locale, then collators
+ * instantiated for these two different input locales will behave
+ * equivalently. The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details. The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales. This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the
+ * Locales and Services section of the ICU User Guide.
+ * @param result fillin for the functionally equivalent result locale
+ * @param resultCapacity capacity of the fillin buffer
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords.
+ * @param locale the specified input locale
+ * @param isAvailable if non-NULL, pointer to a fillin parameter that
+ * on return indicates whether the specified input locale was 'available'
+ * to the collation service. A locale is defined as 'available' if it
+ * physically exists within the collation locale data.
+ * @param status pointer to input-output error code
+ * @return the actual buffer size needed for the locale. If greater
+ * than resultCapacity, the returned full name will be truncated and
+ * an error code will be returned.
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
+ const char* keyword, const char* locale,
+ UBool* isAvailable, UErrorCode* status);
+
+/**
+ * Get the collation tailoring rules from a UCollator.
+ * The rules will follow the rule syntax.
+ * @param coll The UCollator to query.
+ * @param length
+ * @return The collation tailoring rules.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ucol_getRules( const UCollator *coll,
+ int32_t *length);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+/**
+ * Get a sort key for a string from a UCollator.
+ * Sort keys may be compared using strcmp.
+ *
+ * Note that sort keys are often less efficient than simply doing comparison.
+ * For more details, see the ICU User Guide.
+ *
+ * Like ICU functions that write to an output buffer, the buffer contents
+ * is undefined if the buffer capacity (resultLength parameter) is too small.
+ * Unlike ICU functions that write a string to an output buffer,
+ * the terminating zero byte is counted in the sort key length.
+ * @param coll The UCollator containing the collation rules.
+ * @param source The string to transform.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * @param result A pointer to a buffer to receive the attribute.
+ * @param resultLength The maximum size of result.
+ * @return The size needed to fully store the sort key.
+ * If there was an internal error generating the sort key,
+ * a zero value is returned.
+ * @see ucol_keyHashCode
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getSortKey(const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ uint8_t *result,
+ int32_t resultLength);
+
+
+/** Gets the next count bytes of a sort key. Caller needs
+ * to preserve state array between calls and to provide
+ * the same type of UCharIterator set with the same string.
+ * The destination buffer provided must be big enough to store
+ * the number of requested bytes.
+ *
+ * The generated sort key may or may not be compatible with
+ * sort keys generated using ucol_getSortKey().
+ * @param coll The UCollator containing the collation rules.
+ * @param iter UCharIterator containing the string we need
+ * the sort key to be calculated for.
+ * @param state Opaque state of sortkey iteration.
+ * @param dest Buffer to hold the resulting sortkey part
+ * @param count number of sort key bytes required.
+ * @param status error code indicator.
+ * @return the actual number of bytes of a sortkey. It can be
+ * smaller than count if we have reached the end of
+ * the sort key.
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_nextSortKeyPart(const UCollator *coll,
+ UCharIterator *iter,
+ uint32_t state[2],
+ uint8_t *dest, int32_t count,
+ UErrorCode *status);
+
+/** enum that is taken by ucol_getBound API
+ * See below for explanation
+ * do not change the values assigned to the
+ * members of this enum. Underlying code
+ * depends on them having these numbers
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** lower bound */
+ UCOL_BOUND_LOWER = 0,
+ /** upper bound that will match strings of exact size */
+ UCOL_BOUND_UPPER = 1,
+ /** upper bound that will match all the strings that have the same initial substring as the given string */
+ UCOL_BOUND_UPPER_LONG = 2,
+} UColBoundMode;
+
+/**
+ * Produce a bound for a given sortkey and a number of levels.
+ * Return value is always the number of bytes needed, regardless of
+ * whether the result buffer was big enough or even valid.
+ * Date Format helps you to format and parse dates for any locale. Your code can
+ * be completely independent of the locale conventions for months, days of the
+ * week, or even the calendar format: lunar vs. solar.
+ *
+ * To format a date for the current Locale with default time and date style,
+ * use one of the static factory methods:
+ *
+ * You can also use forms of the parse and format methods with Parse Position and
+ * UFieldPosition to allow you to
+ * Date and Time Patterns: Date and time formats are specified by date and time pattern strings.
+ * Within date and time pattern strings, all unquoted ASCII letters [A-Za-z] are reserved
+ * as pattern letters representing calendar fields.
+* Note that the normal date formats associated with some calendars - such
+* as the Chinese lunar calendar - do not specify enough fields to enable
+* dates to be parsed unambiguously. In the case of the Chinese lunar
+* calendar, while the year within the current 60-year cycle is specified,
+* the number of such cycles since the start date of the calendar (in the
+* UCAL_ERA field of the UCalendar object) is not normally part of the format,
+* and parsing may assume the wrong era. For cases such as this it is
+* recommended that clients parse using udat_parseCalendar with the UCalendar
+* passed in set to the current date, or to a date within the era/cycle that
+* should be assumed if absent in the format.
+*
+* @param format The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed date/time
+* @see udat_format
+* @stable ICU 2.0
+*/
+U_CAPI UDate U_EXPORT2
+udat_parse(const UDateFormat* format,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos,
+ UErrorCode *status);
+
+/**
+* Parse a string into an date/time using a UDateFormat.
+* The date will be parsed using the conventions specified in {@link #udat_open }.
+* @param format The formatter to use.
+* @param calendar A calendar set on input to the date and time to be used for
+* missing values in the date/time string being parsed, and set
+* on output to the parsed date/time. When the calendar type is
+* different from the internal calendar held by the UDateFormat
+* instance, the internal calendar will be cloned to a work
+* calendar set to the same milliseconds and time zone as this
+* calendar parameter, field values will be parsed based on the
+* work calendar, then the result (milliseconds and time zone)
+* will be set in this calendar.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not 0, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_format
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_parseCalendar(const UDateFormat* format,
+ UCalendar* calendar,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos,
+ UErrorCode *status);
+
+/**
+* Determine if an UDateFormat will perform lenient parsing.
+* With lenient parsing, the parser may use heuristics to interpret inputs that do not
+* precisely match the pattern. With strict parsing, inputs must match the pattern.
+* @param fmt The formatter to query
+* @return true if fmt is set to perform lenient parsing, false otherwise.
+* @see udat_setLenient
+* @stable ICU 2.0
+*/
+U_CAPI UBool U_EXPORT2
+udat_isLenient(const UDateFormat* fmt);
+
+/**
+* Specify whether an UDateFormat will perform lenient parsing.
+* With lenient parsing, the parser may use heuristics to interpret inputs that do not
+* precisely match the pattern. With strict parsing, inputs must match the pattern.
+* @param fmt The formatter to set
+* @param isLenient true if fmt should perform lenient parsing, false otherwise.
+* @see dat_isLenient
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_setLenient( UDateFormat* fmt,
+ UBool isLenient);
+
+/**
+* Get the UCalendar associated with an UDateFormat.
+* A UDateFormat uses a UCalendar to convert a raw value to, for example,
+* the day of the week.
+* @param fmt The formatter to query.
+* @return A pointer to the UCalendar used by fmt.
+* @see udat_setCalendar
+* @stable ICU 2.0
+*/
+U_CAPI const UCalendar* U_EXPORT2
+udat_getCalendar(const UDateFormat* fmt);
+
+/**
+* Set the UCalendar associated with an UDateFormat.
+* A UDateFormat uses a UCalendar to convert a raw value to, for example,
+* the day of the week.
+* @param fmt The formatter to set.
+* @param calendarToSet A pointer to an UCalendar to be used by fmt.
+* @see udat_setCalendar
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_setCalendar( UDateFormat* fmt,
+ const UCalendar* calendarToSet);
+
+/**
+* Get the UNumberFormat associated with an UDateFormat.
+* A UDateFormat uses a UNumberFormat to format numbers within a date,
+* for example the day number.
+* @param fmt The formatter to query.
+* @return A pointer to the UNumberFormat used by fmt to format numbers.
+* @see udat_setNumberFormat
+* @stable ICU 2.0
+*/
+U_CAPI const UNumberFormat* U_EXPORT2
+udat_getNumberFormat(const UDateFormat* fmt);
+
+/**
+* Get the UNumberFormat for specific field associated with an UDateFormat.
+* For example: 'y' for year and 'M' for month
+* @param fmt The formatter to query.
+* @param field the field to query
+* @return A pointer to the UNumberFormat used by fmt to format field numbers.
+* @see udat_setNumberFormatForField
+* @stable ICU 54
+*/
+U_CAPI const UNumberFormat* U_EXPORT2
+udat_getNumberFormatForField(const UDateFormat* fmt, UChar field);
+
+/**
+* Set the UNumberFormat for specific field associated with an UDateFormat.
+* It can be a single field like: "y"(year) or "M"(month)
+* It can be several field combined together: "yM"(year and month)
+* Note:
+* 1 symbol field is enough for multiple symbol field (so "y" will override "yy", "yyy")
+* If the field is not numeric, then override has no effect (like "MMM" will use abbreviation, not numerical field)
+*
+* @param fields the fields to set
+* @param fmt The formatter to set.
+* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
+* @param status error code passed around (memory allocation or invalid fields)
+* @see udat_getNumberFormatForField
+* @stable ICU 54
+*/
+U_CAPI void U_EXPORT2
+udat_adoptNumberFormatForFields( UDateFormat* fmt,
+ const UChar* fields,
+ UNumberFormat* numberFormatToSet,
+ UErrorCode* status);
+/**
+* Set the UNumberFormat associated with an UDateFormat.
+* A UDateFormat uses a UNumberFormat to format numbers within a date,
+* for example the day number.
+* This method also clears per field NumberFormat instances previously
+* set by {@see udat_setNumberFormatForField}
+* @param fmt The formatter to set.
+* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers.
+* @see udat_getNumberFormat
+* @see udat_setNumberFormatForField
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_setNumberFormat( UDateFormat* fmt,
+ const UNumberFormat* numberFormatToSet);
+/**
+* Adopt the UNumberFormat associated with an UDateFormat.
+* A UDateFormat uses a UNumberFormat to format numbers within a date,
+* for example the day number.
+* @param fmt The formatter to set.
+* @param numberFormatToAdopt A pointer to the UNumberFormat to be used by fmt to format numbers.
+* @see udat_getNumberFormat
+* @stable ICU 54
+*/
+U_CAPI void U_EXPORT2
+udat_adoptNumberFormat( UDateFormat* fmt,
+ UNumberFormat* numberFormatToAdopt);
+/**
+* Get a locale for which date/time formatting patterns are available.
+* A UDateFormat in a locale returned by this function will perform the correct
+* formatting and parsing for the locale.
+* @param localeIndex The index of the desired locale.
+* @return A locale for which date/time formatting patterns are available, or 0 if none.
+* @see udat_countAvailable
+* @stable ICU 2.0
+*/
+U_CAPI const char* U_EXPORT2
+udat_getAvailable(int32_t localeIndex);
+
+/**
+* Determine how many locales have date/time formatting patterns available.
+* This function is most useful as determining the loop ending condition for
+* calls to {@link #udat_getAvailable }.
+* @return The number of locales for which date/time formatting patterns are available.
+* @see udat_getAvailable
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+udat_countAvailable(void);
+
+/**
+* Get the year relative to which all 2-digit years are interpreted.
+* For example, if the 2-digit start year is 2100, the year 99 will be
+* interpreted as 2199.
+* @param fmt The formatter to query.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The year relative to which all 2-digit years are interpreted.
+* @see udat_Set2DigitYearStart
+* @stable ICU 2.0
+*/
+U_CAPI UDate U_EXPORT2
+udat_get2DigitYearStart( const UDateFormat *fmt,
+ UErrorCode *status);
+
+/**
+* Set the year relative to which all 2-digit years will be interpreted.
+* For example, if the 2-digit start year is 2100, the year 99 will be
+* interpreted as 2199.
+* @param fmt The formatter to set.
+* @param d The year relative to which all 2-digit years will be interpreted.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_Set2DigitYearStart
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_set2DigitYearStart( UDateFormat *fmt,
+ UDate d,
+ UErrorCode *status);
+
+/**
+* Extract the pattern from a UDateFormat.
+* The pattern will follow the pattern syntax rules.
+* @param fmt The formatter to query.
+* @param localized true if the pattern should be localized, false otherwise.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_applyPattern
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+udat_toPattern( const UDateFormat *fmt,
+ UBool localized,
+ UChar *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+* Set the pattern used by an UDateFormat.
+* The pattern should follow the pattern syntax rules.
+* @param format The formatter to set.
+* @param localized true if the pattern is localized, false otherwise.
+* @param pattern The new pattern
+* @param patternLength The length of pattern, or -1 if null-terminated.
+* @see udat_toPattern
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_applyPattern( UDateFormat *format,
+ UBool localized,
+ const UChar *pattern,
+ int32_t patternLength);
+
+/**
+ * The possible types of date format symbols
+ * @stable ICU 2.6
+ */
+typedef enum UDateFormatSymbolType {
+ /** The era names, for example AD */
+ UDAT_ERAS,
+ /** The month names, for example February */
+ UDAT_MONTHS,
+ /** The short month names, for example Feb. */
+ UDAT_SHORT_MONTHS,
+ /** The CLDR-style format "wide" weekday names, for example Monday */
+ UDAT_WEEKDAYS,
+ /**
+ * The CLDR-style format "abbreviated" (not "short") weekday names, for example "Mon."
+ * For the CLDR-style format "short" weekday names, use UDAT_SHORTER_WEEKDAYS.
+ */
+ UDAT_SHORT_WEEKDAYS,
+ /** The AM/PM names, for example AM */
+ UDAT_AM_PMS,
+ /** The localized characters */
+ UDAT_LOCALIZED_CHARS,
+ /** The long era names, for example Anno Domini */
+ UDAT_ERA_NAMES,
+ /** The narrow month names, for example F */
+ UDAT_NARROW_MONTHS,
+ /** The CLDR-style format "narrow" weekday names, for example "M" */
+ UDAT_NARROW_WEEKDAYS,
+ /** Standalone context versions of months */
+ UDAT_STANDALONE_MONTHS,
+ UDAT_STANDALONE_SHORT_MONTHS,
+ UDAT_STANDALONE_NARROW_MONTHS,
+ /** The CLDR-style stand-alone "wide" weekday names */
+ UDAT_STANDALONE_WEEKDAYS,
+ /**
+ * The CLDR-style stand-alone "abbreviated" (not "short") weekday names.
+ * For the CLDR-style stand-alone "short" weekday names, use UDAT_STANDALONE_SHORTER_WEEKDAYS.
+ */
+ UDAT_STANDALONE_SHORT_WEEKDAYS,
+ /** The CLDR-style stand-alone "narrow" weekday names */
+ UDAT_STANDALONE_NARROW_WEEKDAYS,
+ /** The quarters, for example 1st Quarter */
+ UDAT_QUARTERS,
+ /** The short quarter names, for example Q1 */
+ UDAT_SHORT_QUARTERS,
+ /** Standalone context versions of quarters */
+ UDAT_STANDALONE_QUARTERS,
+ UDAT_STANDALONE_SHORT_QUARTERS,
+ /**
+ * The CLDR-style short weekday names, e.g. "Su", Mo", etc.
+ * These are named "SHORTER" to contrast with the constants using _SHORT_
+ * above, which actually get the CLDR-style *abbreviated* versions of the
+ * corresponding names.
+ * @stable ICU 51
+ */
+ UDAT_SHORTER_WEEKDAYS,
+ /**
+ * Standalone version of UDAT_SHORTER_WEEKDAYS.
+ * @stable ICU 51
+ */
+ UDAT_STANDALONE_SHORTER_WEEKDAYS,
+ /**
+ * Cyclic year names (only supported for some calendars, and only for FORMAT usage;
+ * udat_setSymbols not supported for UDAT_CYCLIC_YEARS_WIDE)
+ * @stable ICU 54
+ */
+ UDAT_CYCLIC_YEARS_WIDE,
+ /**
+ * Cyclic year names (only supported for some calendars, and only for FORMAT usage)
+ * @stable ICU 54
+ */
+ UDAT_CYCLIC_YEARS_ABBREVIATED,
+ /**
+ * Cyclic year names (only supported for some calendars, and only for FORMAT usage;
+ * udat_setSymbols not supported for UDAT_CYCLIC_YEARS_NARROW)
+ * @stable ICU 54
+ */
+ UDAT_CYCLIC_YEARS_NARROW,
+ /**
+ * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage;
+ * udat_setSymbols not supported for UDAT_ZODIAC_NAMES_WIDE)
+ * @stable ICU 54
+ */
+ UDAT_ZODIAC_NAMES_WIDE,
+ /**
+ * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage)
+ * @stable ICU 54
+ */
+ UDAT_ZODIAC_NAMES_ABBREVIATED,
+ /**
+ * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage;
+ * udat_setSymbols not supported for UDAT_ZODIAC_NAMES_NARROW)
+ * @stable ICU 54
+ */
+ UDAT_ZODIAC_NAMES_NARROW,
+
+ /**
+ * The narrow quarter names, for example 1
+ * @stable ICU 70
+ */
+ UDAT_NARROW_QUARTERS,
+
+ /**
+ * The narrow standalone quarter names, for example 1
+ * @stable ICU 70
+ */
+ UDAT_STANDALONE_NARROW_QUARTERS
+} UDateFormatSymbolType;
+
+struct UDateFormatSymbols;
+/** Date format symbols.
+ * For usage in C programs.
+ * @stable ICU 2.6
+ */
+typedef struct UDateFormatSymbols UDateFormatSymbols;
+
+/**
+* Get the symbols associated with an UDateFormat.
+* The symbols are what a UDateFormat uses to represent locale-specific data,
+* for example month or day names.
+* @param fmt The formatter to query.
+* @param type The type of symbols to get. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @param symbolIndex The desired symbol of type type.
+* @param result A pointer to a buffer to receive the pattern.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see udat_countSymbols
+* @see udat_setSymbols
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+udat_getSymbols(const UDateFormat *fmt,
+ UDateFormatSymbolType type,
+ int32_t symbolIndex,
+ UChar *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+* Count the number of particular symbols for an UDateFormat.
+* This function is most useful as for determining the loop termination condition
+* for calls to {@link #udat_getSymbols }.
+* @param fmt The formatter to query.
+* @param type The type of symbols to count. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @return The number of symbols of type type.
+* @see udat_getSymbols
+* @see udat_setSymbols
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+udat_countSymbols( const UDateFormat *fmt,
+ UDateFormatSymbolType type);
+
+/**
+* Set the symbols associated with an UDateFormat.
+* The symbols are what a UDateFormat uses to represent locale-specific data,
+* for example month or day names.
+* @param format The formatter to set
+* @param type The type of symbols to set. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS,
+* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS
+* @param symbolIndex The index of the symbol to set of type type.
+* @param value The new value
+* @param valueLength The length of value, or -1 if null-terminated
+* @param status A pointer to an UErrorCode to receive any errors
+* @see udat_getSymbols
+* @see udat_countSymbols
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+udat_setSymbols( UDateFormat *format,
+ UDateFormatSymbolType type,
+ int32_t symbolIndex,
+ UChar *value,
+ int32_t valueLength,
+ UErrorCode *status);
+
+/**
+ * Get the locale for this date format object.
+ * You can choose between valid and actual locale.
+ * @param fmt The formatter to get the locale from
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+udat_getLocaleByType(const UDateFormat *fmt,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+/**
+ * Set a particular UDisplayContext value in the formatter, such as
+ * UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
+ * @param fmt The formatter for which to set a UDisplayContext value.
+ * @param value The UDisplayContext value to set.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @stable ICU 51
+ */
+U_CAPI void U_EXPORT2
+udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status);
+
+/**
+ * Get the formatter's UDisplayContext value for the specified UDisplayContextType,
+ * such as UDISPCTX_TYPE_CAPITALIZATION.
+ * @param fmt The formatter to query.
+ * @param type The UDisplayContextType whose value to return
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The UDisplayContextValue for the specified type.
+ * @stable ICU 53
+ */
+U_CAPI UDisplayContext U_EXPORT2
+udat_getContext(const UDateFormat* fmt, UDisplayContextType type, UErrorCode* status);
+
+#ifndef U_HIDE_INTERNAL_API
+
+
+/**
+ * @internal
+ * @see udat_open
+ */
+typedef UDateFormat* (U_EXPORT2 *UDateFormatOpener) (UDateFormatStyle timeStyle,
+ UDateFormatStyle dateStyle,
+ const char *locale,
+ const UChar *tzID,
+ int32_t tzIDLength,
+ const UChar *pattern,
+ int32_t patternLength,
+ UErrorCode *status);
+
+
+#endif /* U_HIDE_INTERNAL_API */
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/udisplaycontext.h b/third_party/icu4c/ndk_headers/unicode/udisplaycontext.h
new file mode 100644
index 00000000000..dbce02697bf
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/udisplaycontext.h
@@ -0,0 +1,173 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*****************************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*****************************************************************************************
+*/
+
+#ifndef UDISPLAYCONTEXT_H
+#define UDISPLAYCONTEXT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * \file
+ * \brief C API: Display context types (enum values)
+ */
+
+/**
+ * Display context types, for getting values of a particular setting.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContextType {
+ /**
+ * Type to retrieve the dialect handling setting, e.g.
+ * UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_DIALECT_HANDLING = 0,
+ /**
+ * Type to retrieve the capitalization context setting, e.g.
+ * UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_CAPITALIZATION = 1,
+ /**
+ * Type to retrieve the display length setting, e.g.
+ * UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
+ * @stable ICU 54
+ */
+ UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
+ /**
+ * Type to retrieve the substitute handling setting, e.g.
+ * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
+ * @stable ICU 58
+ */
+ UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContextType UDisplayContextType;
+
+/**
+ * Display context settings.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContext {
+ /**
+ * ================================
+ * DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or
+ * UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use standard names when generating a locale name,
+ * e.g. en_GB displays as 'English (United Kingdom)'.
+ * @stable ICU 51
+ */
+ UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use dialect names, when generating a locale name,
+ * e.g. en_GB displays as 'British English'.
+ * @stable ICU 51
+ */
+ UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
+ /**
+ * ================================
+ * CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE,
+ * UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or
+ * UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
+ * Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value.
+ */
+ /**
+ * The capitalization context to be used is unknown (this is the default value).
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the middle of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the beginning of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for a user-interface list or menu item.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for stand-alone usage such as an
+ * isolated name on a calendar page.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
+ /**
+ * ================================
+ * DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or
+ * UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH
+ * to get the value.
+ */
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use full names when generating a locale name,
+ * e.g. "United States" for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use short names when generating a locale name,
+ * e.g. "U.S." for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
+ /**
+ * ================================
+ * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
+ * UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a fallback value (e.g., the input code) when no data is available.
+ * This is the default value.
+ * @stable ICU 58
+ */
+ UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no
+ * data is available.
+ * @stable ICU 58
+ */
+ UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
+
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContext UDisplayContext;
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/ufieldpositer.h b/third_party/icu4c/ndk_headers/unicode/ufieldpositer.h
new file mode 100644
index 00000000000..b50b2c0cf5a
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/ufieldpositer.h
@@ -0,0 +1,123 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*****************************************************************************************
+* Copyright (C) 2015-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*****************************************************************************************
+*/
+
+#ifndef UFIELDPOSITER_H
+#define UFIELDPOSITER_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: UFieldPositionIterator for use with format APIs.
+ *
+ * Usage:
+ * ufieldpositer_open creates an empty (unset) UFieldPositionIterator.
+ * This can be passed to format functions such as {@link #udat_formatForFields},
+ * which will set it to apply to the fields in a particular formatted string.
+ * ufieldpositer_next can then be used to iterate over those fields,
+ * providing for each field its type (using values that are specific to the
+ * particular format type, such as date or number formats), as well as the
+ * start and end positions of the field in the formatted string.
+ * A given UFieldPositionIterator can be re-used for different format calls;
+ * each such call resets it to apply to that format string.
+ * ufieldpositer_close should be called to dispose of the UFieldPositionIterator
+ * when it is no longer needed.
+ *
+ * @see FieldPositionIterator
+ */
+
+/**
+ * Opaque UFieldPositionIterator object for use in C.
+ * @stable ICU 55
+ */
+struct UFieldPositionIterator;
+typedef struct UFieldPositionIterator UFieldPositionIterator; /**< C typedef for struct UFieldPositionIterator. @stable ICU 55 */
+
+/**
+ * Open a new, unset UFieldPositionIterator object.
+ * @param status
+ * A pointer to a UErrorCode to receive any errors.
+ * @return
+ * A pointer to an empty (unset) UFieldPositionIterator object,
+ * or NULL if an error occurred.
+ * @stable ICU 55
+ */
+U_CAPI UFieldPositionIterator* U_EXPORT2
+ufieldpositer_open(UErrorCode* status);
+
+/**
+ * Close a UFieldPositionIterator object. Once closed it may no longer be used.
+ * @param fpositer
+ * A pointer to the UFieldPositionIterator object to close.
+ * @stable ICU 55
+ */
+U_CAPI void U_EXPORT2
+ufieldpositer_close(UFieldPositionIterator *fpositer);
+
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUFieldPositionIteratorPointer
+ * "Smart pointer" class, closes a UFieldPositionIterator via ufieldpositer_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 55
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUFieldPositionIteratorPointer, UFieldPositionIterator, ufieldpositer_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get information for the next field in the formatted string to which this
+ * UFieldPositionIterator currently applies, or return a negative value if there
+ * are no more fields.
+ * @param fpositer
+ * A pointer to the UFieldPositionIterator object containing iteration
+ * state for the format fields.
+ * @param beginIndex
+ * A pointer to an int32_t to receive information about the start offset
+ * of the field in the formatted string (undefined if the function
+ * returns a negative value). May be NULL if this information is not needed.
+ * @param endIndex
+ * A pointer to an int32_t to receive information about the end offset
+ * of the field in the formatted string (undefined if the function
+ * returns a negative value). May be NULL if this information is not needed.
+ * @return
+ * The field type (non-negative value), or a negative value if there are
+ * no more fields for which to provide information. If negative, then any
+ * values pointed to by beginIndex and endIndex are undefined.
+ *
+ * The values for field type depend on what type of formatter the
+ * UFieldPositionIterator has been set by; for a date formatter, the
+ * values from the UDateFormatField enum. For more information, see the
+ * descriptions of format functions that take a UFieldPositionIterator*
+ * parameter, such as {@link #udat_formatForFields}.
+ *
+ * @stable ICU 55
+ */
+U_CAPI int32_t U_EXPORT2
+ufieldpositer_next(UFieldPositionIterator *fpositer,
+ int32_t *beginIndex, int32_t *endIndex);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/uloc.h b/third_party/icu4c/ndk_headers/unicode/uloc.h
new file mode 100644
index 00000000000..365a627287c
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/uloc.h
@@ -0,0 +1,1380 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/22/98 stephen JDK 1.2 sync.
+* 12/08/98 rtg New C API for Locale
+* 03/30/99 damiba overhaul
+* 03/31/99 helena Javadoc for uloc functions.
+* 04/15/99 Madhu Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Locale ID functionality similar to C++ class Locale
+ *
+ *
+ * You create a
+ * The second option includes an additional ISO Country
+ * Code. These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ *
+ * The third option requires another additional information--the
+ * Variant.
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ *
+ * Because a
+ * The
+ * Once you've specified a locale you can query it for information about
+ * itself. Use
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the
+ * Each international service that performs locale-sensitive operations
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly
+ * Concerning POSIX/RFC1766 Locale IDs,
+ * the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions. For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL and all keywords).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 157
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 96
+
+/**
+ * Useful constant for the maximum total size of keywords and their values in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Invariant character separating keywords from the locale string
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+
+/**
+ * Unicode code point for '@' separating keywords from the locale string.
+ * @see ULOC_KEYWORD_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
+
+/**
+ * Invariant character for assigning value to a keyword
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+
+/**
+ * Unicode code point for '=' for assigning value to a keyword.
+ * @see ULOC_KEYWORD_ASSIGN
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
+
+/**
+ * Invariant character separating keywords
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Unicode code point for ';' separating keywords
+ * @see ULOC_KEYWORD_ITEM_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0),
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
+ ULOC_ACTUAL_LOCALE = 0,
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
+ ULOC_VALID_LOCALE = 1,
+} ULocDataLocaleType;
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ *
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ *
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ * the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+uloc_setDefault(const char* localeID,
+ UErrorCode* status);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code. If it's greater
+ * than countryCapacity, the returned country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code. If it's greater
+ * than variantCapacity, the returned variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * LCIDs were deprecated with Windows Vista and Microsoft recommends
+ * that developers use BCP47 style tags instead (uloc_toLanguageTag).
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * displayable language code with.
+ * @param status error information if retrieving the displayable language code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * language code is placed into language as fallback.
+ * @return the actual buffer size needed for the displayable language code. If
+ * it's greater than languageCapacity, the returned language
+ * code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+ const char* displayLocale,
+ UChar* language,
+ int32_t languageCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be
+ * used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "", while
+ * passing Locale::getGerman() for inLocale would result in "".
+ * NULL may be used to specify the default.
+ * @param script the displayable script for the localeID.
+ * @param scriptCapacity the size of the script buffer to store the displayable
+ * script code with.
+ * @param status error information if retrieving the displayable script code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * script code is placed into script as fallback.
+ * @return the actual buffer size needed for the displayable script code. If
+ * it's greater than scriptCapacity, the returned displayable
+ * script code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ * Warning: this is for the region part of a valid locale ID; it cannot just be
+ * the region code (like "FR"). To get the display name for a region alone, or
+ * for other options, use ULocaleDisplayNames instead.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID.
+ * @param countryCapacity the size of the country buffer to store the
+ * displayable country code with.
+ * @param status error information if retrieving the displayable country code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * country code is placed into country as fallback.
+ * @return the actual buffer size needed for the displayable country code. If
+ * it's greater than countryCapacity, the returned displayable
+ * country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID.
+ * @param variantCapacity the size of the variant buffer to store the
+ * displayable variant code with.
+ * @param status error information if retrieving the displayable variant code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * variant code is placed into variant as fallback.
+ * @return the actual buffer size needed for the displayable variant code. If
+ * it's greater than variantCapacity, the returned displayable
+ * variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale. E.g:
+ * for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ *
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param langtag the input BCP47 language tag.
+ * @param localeID the output buffer receiving a locale ID for the
+ * specified BCP47 language tag.
+ * @param localeIDCapacity the size of the locale ID output buffer.
+ * @param parsedLength if not NULL, successfully parsed length
+ * for the input language tag is set.
+ * @param err error information if receiving the locald ID
+ * failed.
+ * @return the length of the locale ID.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* err);
+
+/**
+ * Returns a well-formed language tag for this locale ID.
+ *
+ * Note: When
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ *
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ *
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * IMPORTANT: New users with are strongly encouraged to
+ * see if unumberformatter.h fits their use case. Although not deprecated,
+ * this header is provided for backwards compatibility only.
+ *
+ * Number Format C API Provides functions for
+ * formatting and parsing a number. Also provides methods for
+ * determining which locales have number formats, and what their names
+ * are.
+ *
+ * UNumberFormat helps you to format and parse numbers for any locale.
+ * Your code can be completely independent of the locale conventions
+ * for decimal points, thousands-separators, or even the particular
+ * decimal digits used, or whether the number format is even decimal.
+ * There are different number format styles like decimal, currency,
+ * percent and spellout.
+ *
+ * To format a number for the current Locale, use one of the static
+ * factory methods:
+ *
+ * Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat
+ * formatter. The pattern must conform to the syntax defined for those
+ * formatters.
+ *
+ * You can also control the display of numbers with such function as
+ * unum_getAttributes() and unum_setAttributes(), which let you set the
+ * minimum fraction digits, grouping, etc.
+ * @see UNumberFormatAttributes for more details
+ *
+ * You can also use forms of the parse and format methods with
+ * ParsePosition and UFieldPosition to allow you to:
+ *
+ * It is also possible to change or set the symbols used for a particular
+ * locale like the currency symbol, the grouping separator , monetary separator
+ * etc by making use of functions unum_setSymbols() and unum_getSymbols().
+ */
+
+/** A number formatter.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef void* UNumberFormat;
+
+/** The possible number format styles.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatStyle {
+ /**
+ * Decimal format defined by a pattern string.
+ * @stable ICU 3.0
+ */
+ UNUM_PATTERN_DECIMAL=0,
+ /**
+ * Decimal format ("normal" style).
+ * @stable ICU 2.0
+ */
+ UNUM_DECIMAL=1,
+ /**
+ * Currency format (generic).
+ * Defaults to UNUM_CURRENCY_STANDARD style
+ * (using currency symbol, e.g., "$1.00", with non-accounting
+ * style for negative values e.g. using minus sign).
+ * The specific style may be specified using the -cf- locale key.
+ * @stable ICU 2.0
+ */
+ UNUM_CURRENCY=2,
+ /**
+ * Percent format
+ * @stable ICU 2.0
+ */
+ UNUM_PERCENT=3,
+ /**
+ * Scientific format
+ * @stable ICU 2.1
+ */
+ UNUM_SCIENTIFIC=4,
+ /**
+ * Spellout rule-based format. The default ruleset can be specified/changed using
+ * unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets
+ * can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS.
+ * @stable ICU 2.0
+ */
+ UNUM_SPELLOUT=5,
+ /**
+ * Ordinal rule-based format . The default ruleset can be specified/changed using
+ * unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets
+ * can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS.
+ * @stable ICU 3.0
+ */
+ UNUM_ORDINAL=6,
+ /**
+ * Duration rule-based format
+ * @stable ICU 3.0
+ */
+ UNUM_DURATION=7,
+ /**
+ * Numbering system rule-based format
+ * @stable ICU 4.2
+ */
+ UNUM_NUMBERING_SYSTEM=8,
+ /**
+ * Rule-based format defined by a pattern string.
+ * @stable ICU 3.0
+ */
+ UNUM_PATTERN_RULEBASED=9,
+ /**
+ * Currency format with an ISO currency code, e.g., "USD1.00".
+ * @stable ICU 4.8
+ */
+ UNUM_CURRENCY_ISO=10,
+ /**
+ * Currency format with a pluralized currency name,
+ * e.g., "1.00 US dollar" and "3.00 US dollars".
+ * @stable ICU 4.8
+ */
+ UNUM_CURRENCY_PLURAL=11,
+ /**
+ * Currency format for accounting, e.g., "($3.00)" for
+ * negative currency amount instead of "-$3.00" ({@link #UNUM_CURRENCY}).
+ * Overrides any style specified using -cf- key in locale.
+ * @stable ICU 53
+ */
+ UNUM_CURRENCY_ACCOUNTING=12,
+ /**
+ * Currency format with a currency symbol given CASH usage, e.g.,
+ * "NT$3" instead of "NT$3.23".
+ * @stable ICU 54
+ */
+ UNUM_CASH_CURRENCY=13,
+ /**
+ * Decimal format expressed using compact notation
+ * (short form, corresponds to UNumberCompactStyle=UNUM_SHORT)
+ * e.g. "23K", "45B"
+ * @stable ICU 56
+ */
+ UNUM_DECIMAL_COMPACT_SHORT=14,
+ /**
+ * Decimal format expressed using compact notation
+ * (long form, corresponds to UNumberCompactStyle=UNUM_LONG)
+ * e.g. "23 thousand", "45 billion"
+ * @stable ICU 56
+ */
+ UNUM_DECIMAL_COMPACT_LONG=15,
+ /**
+ * Currency format with a currency symbol, e.g., "$1.00",
+ * using non-accounting style for negative values (e.g. minus sign).
+ * Overrides any style specified using -cf- key in locale.
+ * @stable ICU 56
+ */
+ UNUM_CURRENCY_STANDARD=16,
+ /**
+ * Default format
+ * @stable ICU 2.0
+ */
+ UNUM_DEFAULT = UNUM_DECIMAL,
+ /**
+ * Alias for UNUM_PATTERN_DECIMAL
+ * @stable ICU 3.0
+ */
+ UNUM_IGNORE = UNUM_PATTERN_DECIMAL
+} UNumberFormatStyle;
+
+/** The possible number format rounding modes.
+ *
+ *
+ * For more detail on rounding modes, see:
+ * https://unicode-org.github.io/icu/userguide/format_parse/numbers/rounding-modes
+ *
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatRoundingMode {
+ UNUM_ROUND_CEILING,
+ UNUM_ROUND_FLOOR,
+ UNUM_ROUND_DOWN,
+ UNUM_ROUND_UP,
+ /**
+ * Half-even rounding
+ * @stable, ICU 3.8
+ */
+ UNUM_ROUND_HALFEVEN,
+ UNUM_ROUND_HALFDOWN = UNUM_ROUND_HALFEVEN + 1,
+ UNUM_ROUND_HALFUP,
+ /**
+ * ROUND_UNNECESSARY reports an error if formatted result is not exact.
+ * @stable ICU 4.8
+ */
+ UNUM_ROUND_UNNECESSARY,
+ /**
+ * Rounds ties toward the odd number.
+ * @stable ICU 69
+ */
+ UNUM_ROUND_HALF_ODD,
+ /**
+ * Rounds ties toward +∞.
+ * @stable ICU 69
+ */
+ UNUM_ROUND_HALF_CEILING,
+ /**
+ * Rounds ties toward -∞.
+ * @stable ICU 69
+ */
+ UNUM_ROUND_HALF_FLOOR,
+} UNumberFormatRoundingMode;
+
+/** The possible number format pad positions.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatPadPosition {
+ UNUM_PAD_BEFORE_PREFIX,
+ UNUM_PAD_AFTER_PREFIX,
+ UNUM_PAD_BEFORE_SUFFIX,
+ UNUM_PAD_AFTER_SUFFIX
+} UNumberFormatPadPosition;
+
+/**
+ * Constants for specifying short or long format.
+ * @stable ICU 51
+ */
+typedef enum UNumberCompactStyle {
+ /** @stable ICU 51 */
+ UNUM_SHORT,
+ /** @stable ICU 51 */
+ UNUM_LONG
+ /** @stable ICU 51 */
+} UNumberCompactStyle;
+
+/**
+ * Constants for specifying currency spacing
+ * @stable ICU 4.8
+ */
+enum UCurrencySpacing {
+ /** @stable ICU 4.8 */
+ UNUM_CURRENCY_MATCH,
+ /** @stable ICU 4.8 */
+ UNUM_CURRENCY_SURROUNDING_MATCH,
+ /** @stable ICU 4.8 */
+ UNUM_CURRENCY_INSERT,
+};
+typedef enum UCurrencySpacing UCurrencySpacing; /**< @stable ICU 4.8 */
+
+
+/**
+ * FieldPosition and UFieldPosition selectors for format fields
+ * defined by NumberFormat and UNumberFormat.
+ * @stable ICU 49
+ */
+typedef enum UNumberFormatFields {
+ /** @stable ICU 49 */
+ UNUM_INTEGER_FIELD,
+ /** @stable ICU 49 */
+ UNUM_FRACTION_FIELD,
+ /** @stable ICU 49 */
+ UNUM_DECIMAL_SEPARATOR_FIELD,
+ /** @stable ICU 49 */
+ UNUM_EXPONENT_SYMBOL_FIELD,
+ /** @stable ICU 49 */
+ UNUM_EXPONENT_SIGN_FIELD,
+ /** @stable ICU 49 */
+ UNUM_EXPONENT_FIELD,
+ /** @stable ICU 49 */
+ UNUM_GROUPING_SEPARATOR_FIELD,
+ /** @stable ICU 49 */
+ UNUM_CURRENCY_FIELD,
+ /** @stable ICU 49 */
+ UNUM_PERCENT_FIELD,
+ /** @stable ICU 49 */
+ UNUM_PERMILL_FIELD,
+ /** @stable ICU 49 */
+ UNUM_SIGN_FIELD,
+ /** @stable ICU 64 */
+ UNUM_MEASURE_UNIT_FIELD,
+ /** @stable ICU 64 */
+ UNUM_COMPACT_FIELD,
+ UNUM_FIELD_COUNT = UNUM_COMPACT_FIELD + 2
+#else // U_HIDE_DRAFT_API (for UNUM_APPROXIMATELY_SIGN_FIELD)
+ UNUM_FIELD_COUNT = UNUM_COMPACT_FIELD + 1
+#endif // U_HIDE_DRAFT_API (for UNUM_APPROXIMATELY_SIGN_FIELD)
+#endif /* U_HIDE_DEPRECATED_API */
+} UNumberFormatFields;
+
+
+/**
+ * Selectors with special numeric values to use locale default minimum grouping
+ * digits for the DecimalFormat/UNumberFormat setMinimumGroupingDigits method.
+ * Do not use these constants with the [U]NumberFormatter API.
+ *
+ * @stable ICU 68
+ */
+typedef enum UNumberFormatMinimumGroupingDigits {
+ /**
+ * Display grouping using the default strategy for all locales.
+ * @stable ICU 68
+ */
+ UNUM_MINIMUM_GROUPING_DIGITS_AUTO = -2,
+ /**
+ * Display grouping using locale defaults, except do not show grouping on
+ * values smaller than 10000 (such that there is a minimum of two digits
+ * before the first separator).
+ * @stable ICU 68
+ */
+ UNUM_MINIMUM_GROUPING_DIGITS_MIN2 = -3,
+} UNumberFormatMinimumGroupingDigits;
+
+/**
+ * Create and return a new UNumberFormat for formatting and parsing
+ * numbers. A UNumberFormat may be used to format numbers by calling
+ * {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }.
+ * The caller must call {@link #unum_close } when done to release resources
+ * used by this object.
+ * @param style The type of number format to open: one of
+ * UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC,
+ * UNUM_CURRENCY_ISO, UNUM_CURRENCY_PLURAL, UNUM_SPELLOUT,
+ * UNUM_ORDINAL, UNUM_DURATION, UNUM_NUMBERING_SYSTEM,
+ * UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT.
+ * If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the
+ * number format is opened using the given pattern, which must conform
+ * to the syntax described in DecimalFormat or RuleBasedNumberFormat,
+ * respectively.
+ *
+ * NOTE:: New users with are strongly encouraged to
+ * use unumf_openForSkeletonAndLocale instead of unum_open.
+ *
+ * @param pattern A pattern specifying the format to use.
+ * This parameter is ignored unless the style is
+ * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
+ * @param patternLength The number of characters in the pattern, or -1
+ * if null-terminated. This parameter is ignored unless the style is
+ * UNUM_PATTERN.
+ * @param locale A locale identifier to use to determine formatting
+ * and parsing conventions, or NULL to use the default locale.
+ * @param parseErr A pointer to a UParseError struct to receive the
+ * details of any parsing errors, or NULL if no parsing error details
+ * are desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return A pointer to a newly created UNumberFormat, or NULL if an
+ * error occurred.
+ * @see unum_close
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_CAPI UNumberFormat* U_EXPORT2
+unum_open( UNumberFormatStyle style,
+ const UChar* pattern,
+ int32_t patternLength,
+ const char* locale,
+ UParseError* parseErr,
+ UErrorCode* status);
+
+
+/**
+* Close a UNumberFormat.
+* Once closed, a UNumberFormat may no longer be used.
+* @param fmt The formatter to close.
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+unum_close(UNumberFormat* fmt);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNumberFormatPointer
+ * "Smart pointer" class, closes a UNumberFormat via unum_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNumberFormatPointer, UNumberFormat, unum_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Open a copy of a UNumberFormat.
+ * This function performs a deep copy.
+ * @param fmt The format to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UNumberFormat identical to fmt.
+ * @stable ICU 2.0
+ */
+U_CAPI UNumberFormat* U_EXPORT2
+unum_clone(const UNumberFormat *fmt,
+ UErrorCode *status);
+
+/**
+* Format an integer using a UNumberFormat.
+* The integer will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
+* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
+* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
+* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_format( const UNumberFormat* fmt,
+ int32_t number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos,
+ UErrorCode* status);
+
+/**
+* Format an int64 using a UNumberFormat.
+* The int64 will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
+* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
+* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
+* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_formatInt64(const UNumberFormat *fmt,
+ int64_t number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos,
+ UErrorCode* status);
+
+/**
+* Format a double using a UNumberFormat.
+* The double will be formatted according to the UNumberFormat's locale.
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
+* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
+* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
+* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case no field
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_formatDouble( const UNumberFormat* fmt,
+ double number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos, /* 0 if ignore */
+ UErrorCode* status);
+
+/**
+* Format a double using a UNumberFormat according to the UNumberFormat's locale,
+* and initialize a UFieldPositionIterator that enumerates the subcomponents of
+* the resulting string.
+*
+* @param format
+* The formatter to use.
+* @param number
+* The number to format.
+* @param result
+* A pointer to a buffer to receive the NULL-terminated formatted
+* number. If the formatted number fits into dest but cannot be
+* NULL-terminated (length == resultLength) then the error code is set
+* to U_STRING_NOT_TERMINATED_WARNING. If the formatted number doesn't
+* fit into result then the error code is set to
+* U_BUFFER_OVERFLOW_ERROR.
+* @param resultLength
+* The maximum size of result.
+* @param fpositer
+* A pointer to a UFieldPositionIterator created by {@link #ufieldpositer_open}
+* (may be NULL if field position information is not needed, but in this
+* case it's preferable to use {@link #unum_formatDouble}). Iteration
+* information already present in the UFieldPositionIterator is deleted,
+* and the iterator is reset to apply to the fields in the formatted
+* string created by this function call. The field values and indexes
+* returned by {@link #ufieldpositer_next} represent fields denoted by
+* the UNumberFormatFields enum. Fields are not returned in a guaranteed
+* order. Fields cannot overlap, but they may nest. For example, 1234
+* could format as "1,234" which might consist of a grouping separator
+* field for ',' and an integer field encompassing the entire string.
+* @param status
+* A pointer to an UErrorCode to receive any errors
+* @return
+* The total buffer size needed; if greater than resultLength, the
+* output was truncated.
+* @see unum_formatDouble
+* @see unum_parse
+* @see unum_parseDouble
+* @see UFieldPositionIterator
+* @see UNumberFormatFields
+* @stable ICU 59
+*/
+U_CAPI int32_t U_EXPORT2
+unum_formatDoubleForFields(const UNumberFormat* format,
+ double number,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPositionIterator* fpositer,
+ UErrorCode* status);
+
+
+/**
+* Format a decimal number using a UNumberFormat.
+* The number will be formatted according to the UNumberFormat's locale.
+* The syntax of the input number is a "numeric string"
+* as defined in the Decimal Arithmetic Specification, available at
+* http://speleotrove.com/decimal
+* @param fmt The formatter to use.
+* @param number The number to format.
+* @param length The length of the input number, or -1 if the input is nul-terminated.
+* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
+* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
+* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
+* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+* @param resultLength The maximum size of result.
+* @param pos A pointer to a UFieldPosition. On input, position->field
+* is read. On output, position->beginIndex and position->endIndex indicate
+* the beginning and ending indices of field number position->field, if such
+* a field exists. This parameter may be NULL, in which case it is ignored.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see UFieldPosition
+* @stable ICU 4.4
+*/
+U_CAPI int32_t U_EXPORT2
+unum_formatDecimal( const UNumberFormat* fmt,
+ const char * number,
+ int32_t length,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition *pos, /* 0 if ignore */
+ UErrorCode* status);
+
+/**
+ * Format a double currency amount using a UNumberFormat.
+ * The double will be formatted according to the UNumberFormat's locale.
+ *
+ * To format an exact decimal value with a currency, use
+ * `unum_setTextAttribute(UNUM_CURRENCY_CODE, ...)` followed by unum_formatDecimal.
+ * Your UNumberFormat must be created with the UNUM_CURRENCY style. Alternatively,
+ * consider using unumf_openForSkeletonAndLocale.
+ *
+ * @param fmt the formatter to use
+ * @param number the number to format
+ * @param currency the 3-letter null-terminated ISO 4217 currency code
+ * @param result A pointer to a buffer to receive the NULL-terminated formatted number. If
+ * the formatted number fits into dest but cannot be NULL-terminated (length == resultLength)
+ * then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number
+ * doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @param resultLength the maximum number of UChars to write to result
+ * @param pos a pointer to a UFieldPosition. On input,
+ * position->field is read. On output, position->beginIndex and
+ * position->endIndex indicate the beginning and ending indices of
+ * field number position->field, if such a field exists. This
+ * parameter may be NULL, in which case it is ignored.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_formatDouble
+ * @see unum_parseDoubleCurrency
+ * @see UFieldPosition
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+unum_formatDoubleCurrency(const UNumberFormat* fmt,
+ double number,
+ UChar* currency,
+ UChar* result,
+ int32_t resultLength,
+ UFieldPosition* pos,
+ UErrorCode* status);
+
+/**
+* Parse a string into an integer using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
+* and UNUM_DECIMAL_COMPACT_LONG.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not NULL, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parseInt64
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_parse( const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+/**
+* Parse a string into an int64 using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
+* and UNUM_DECIMAL_COMPACT_LONG.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not NULL, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed integer
+* @see unum_parse
+* @see unum_parseDouble
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.8
+*/
+U_CAPI int64_t U_EXPORT2
+unum_parseInt64(const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+/**
+* Parse a string into a double using a UNumberFormat.
+* The string will be parsed according to the UNumberFormat's locale.
+* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
+* and UNUM_DECIMAL_COMPACT_LONG.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not NULL, on output the offset at which parsing ended.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The value of the parsed double
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 2.0
+*/
+U_CAPI double U_EXPORT2
+unum_parseDouble( const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ UErrorCode *status);
+
+
+/**
+* Parse a number from a string into an unformatted numeric string using a UNumberFormat.
+* The input string will be parsed according to the UNumberFormat's locale.
+* The syntax of the output is a "numeric string"
+* as defined in the Decimal Arithmetic Specification, available at
+* http://speleotrove.com/decimal
+* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT
+* and UNUM_DECIMAL_COMPACT_LONG.
+* @param fmt The formatter to use.
+* @param text The text to parse.
+* @param textLength The length of text, or -1 if null-terminated.
+* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which
+* to begin parsing. If not NULL, on output the offset at which parsing ended.
+* @param outBuf A (char *) buffer to receive the parsed number as a string. The output string
+* will be nul-terminated if there is sufficient space.
+* @param outBufLength The size of the output buffer. May be zero, in which case
+* the outBuf pointer may be NULL, and the function will return the
+* size of the output string.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return the length of the output string, not including any terminating nul.
+* @see unum_parse
+* @see unum_parseInt64
+* @see unum_format
+* @see unum_formatInt64
+* @see unum_formatDouble
+* @stable ICU 4.4
+*/
+U_CAPI int32_t U_EXPORT2
+unum_parseDecimal(const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t *parsePos /* 0 = start */,
+ char *outBuf,
+ int32_t outBufLength,
+ UErrorCode *status);
+
+/**
+ * Parse a string into a double and a currency using a UNumberFormat.
+ * The string will be parsed according to the UNumberFormat's locale.
+ * @param fmt the formatter to use
+ * @param text the text to parse
+ * @param textLength the length of text, or -1 if null-terminated
+ * @param parsePos a pointer to an offset index into text at which to
+ * begin parsing. On output, *parsePos will point after the last
+ * parsed character. This parameter may be NULL, in which case parsing
+ * begins at offset 0.
+ * @param currency a pointer to the buffer to receive the parsed null-
+ * terminated currency. This buffer must have a capacity of at least
+ * 4 UChars.
+ * @param status a pointer to an input-output UErrorCode
+ * @return the parsed double
+ * @see unum_parseDouble
+ * @see unum_formatDoubleCurrency
+ * @stable ICU 3.0
+ */
+U_CAPI double U_EXPORT2
+unum_parseDoubleCurrency(const UNumberFormat* fmt,
+ const UChar* text,
+ int32_t textLength,
+ int32_t* parsePos, /* 0 = start */
+ UChar* currency,
+ UErrorCode* status);
+
+/**
+ * Set the pattern used by a UNumberFormat. This can only be used
+ * on a DecimalFormat, other formats return U_UNSUPPORTED_ERROR
+ * in the status.
+ * @param format The formatter to set.
+ * @param localized true if the pattern is localized, false otherwise.
+ * @param pattern The new pattern
+ * @param patternLength The length of pattern, or -1 if null-terminated.
+ * @param parseError A pointer to UParseError to receive information
+ * about errors occurred during parsing, or NULL if no parse error
+ * information is desired.
+ * @param status A pointer to an input-output UErrorCode.
+ * @see unum_toPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+unum_applyPattern( UNumberFormat *format,
+ UBool localized,
+ const UChar *pattern,
+ int32_t patternLength,
+ UParseError *parseError,
+ UErrorCode *status
+ );
+
+/**
+* Get a locale for which decimal formatting patterns are available.
+* A UNumberFormat in a locale returned by this function will perform the correct
+* formatting and parsing for the locale. The results of this call are not
+* valid for rule-based number formats.
+* @param localeIndex The index of the desired locale.
+* @return A locale for which number formatting patterns are available, or 0 if none.
+* @see unum_countAvailable
+* @stable ICU 2.0
+*/
+U_CAPI const char* U_EXPORT2
+unum_getAvailable(int32_t localeIndex);
+
+/**
+* Determine how many locales have decimal formatting patterns available. The
+* results of this call are not valid for rule-based number formats.
+* This function is useful for determining the loop ending condition for
+* calls to {@link #unum_getAvailable }.
+* @return The number of locales for which decimal formatting patterns are available.
+* @see unum_getAvailable
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_countAvailable(void);
+
+#if UCONFIG_HAVE_PARSEALLINPUT
+/* The UNumberFormatAttributeValue type cannot be #ifndef U_HIDE_INTERNAL_API, needed for .h variable declaration */
+/**
+ * @internal
+ */
+typedef enum UNumberFormatAttributeValue {
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ UNUM_NO = 0,
+ /** @internal */
+ UNUM_YES = 1,
+ /** @internal */
+ UNUM_MAYBE = 2
+#else
+ /** @internal */
+ UNUM_FORMAT_ATTRIBUTE_VALUE_HIDDEN
+#endif /* U_HIDE_INTERNAL_API */
+} UNumberFormatAttributeValue;
+#endif
+
+/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */
+typedef enum UNumberFormatAttribute {
+ /** Parse integers only */
+ UNUM_PARSE_INT_ONLY,
+ /** Use grouping separator */
+ UNUM_GROUPING_USED,
+ /** Always show decimal point */
+ UNUM_DECIMAL_ALWAYS_SHOWN,
+ /** Maximum integer digits */
+ UNUM_MAX_INTEGER_DIGITS,
+ /** Minimum integer digits */
+ UNUM_MIN_INTEGER_DIGITS,
+ /** Integer digits */
+ UNUM_INTEGER_DIGITS,
+ /** Maximum fraction digits */
+ UNUM_MAX_FRACTION_DIGITS,
+ /** Minimum fraction digits */
+ UNUM_MIN_FRACTION_DIGITS,
+ /** Fraction digits */
+ UNUM_FRACTION_DIGITS,
+ /** Multiplier */
+ UNUM_MULTIPLIER,
+ /** Grouping size */
+ UNUM_GROUPING_SIZE,
+ /** Rounding Mode */
+ UNUM_ROUNDING_MODE,
+ /** Rounding increment */
+ UNUM_ROUNDING_INCREMENT,
+ /** The width to which the output of Example: setting the scale to 3, 123 formats as "123,000"
+ * Example: setting the scale to -4, 123 formats as "0.0123"
+ *
+ * This setting is analogous to getMultiplierScale() and setMultiplierScale() in decimfmt.h.
+ *
+ * @stable ICU 51 */
+ UNUM_SCALE = 21,
+
+ /**
+ * Minimum grouping digits; most commonly set to 2 to print "1000" instead of "1,000".
+ * See DecimalFormat::getMinimumGroupingDigits().
+ *
+ * For better control over grouping strategies, use UNumberFormatter.
+ *
+ * @stable ICU 64
+ */
+ UNUM_MINIMUM_GROUPING_DIGITS = 22,
+
+ /**
+ * if this attribute is set to 0, it is set to UNUM_CURRENCY_STANDARD purpose,
+ * otherwise it is UNUM_CASH_CURRENCY purpose
+ * Default: 0 (UNUM_CURRENCY_STANDARD purpose)
+ * @stable ICU 54
+ */
+ UNUM_CURRENCY_USAGE = 23,
+
+#ifndef U_HIDE_INTERNAL_API
+ /** One below the first bitfield-boolean item.
+ * All items after this one are stored in boolean form.
+ * @internal */
+ UNUM_MAX_NONBOOLEAN_ATTRIBUTE = 0x0FFF,
+#endif /* U_HIDE_INTERNAL_API */
+
+ /** If 1, specifies that if setting the "max integer digits" attribute would truncate a value, set an error status rather than silently truncating.
+ * For example, formatting the value 1234 with 4 max int digits would succeed, but formatting 12345 would fail. There is no effect on parsing.
+ * Default: 0 (not set)
+ * @stable ICU 50
+ */
+ UNUM_FORMAT_FAIL_IF_MORE_THAN_MAX_DIGITS = 0x1000,
+ /**
+ * if this attribute is set to 1, specifies that, if the pattern doesn't contain an exponent, the exponent will not be parsed. If the pattern does contain an exponent, this attribute has no effect.
+ * Has no effect on formatting.
+ * Default: 0 (unset)
+ * @stable ICU 50
+ */
+ UNUM_PARSE_NO_EXPONENT = 0x1001,
+
+ /**
+ * if this attribute is set to 1, specifies that, if the pattern contains a
+ * decimal mark the input is required to have one. If this attribute is set to 0,
+ * specifies that input does not have to contain a decimal mark.
+ * Has no effect on formatting.
+ * Default: 0 (unset)
+ * @stable ICU 54
+ */
+ UNUM_PARSE_DECIMAL_MARK_REQUIRED = 0x1002,
+
+ /**
+ * Parsing: if set to 1, parsing is sensitive to case (lowercase/uppercase).
+ *
+ * @stable ICU 64
+ */
+ UNUM_PARSE_CASE_SENSITIVE = 0x1003,
+
+ /**
+ * Formatting: if set to 1, whether to show the plus sign on non-negative numbers.
+ *
+ * For better control over sign display, use UNumberFormatter.
+ *
+ * @stable ICU 64
+ */
+ UNUM_SIGN_ALWAYS_SHOWN = 0x1004,
+
+#ifndef U_HIDE_INTERNAL_API
+ /** Limit of boolean attributes. (value should
+ * not depend on U_HIDE conditionals)
+ * @internal */
+ UNUM_LIMIT_BOOLEAN_ATTRIBUTE = 0x1005,
+#endif /* U_HIDE_INTERNAL_API */
+
+} UNumberFormatAttribute;
+
+#ifndef U_HIDE_DRAFT_API
+
+#endif // U_HIDE_DRAFT_API
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
+* UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS.
+* @return The value of attr, or -1 if the formatter doesn't have the requested attribute. The caller should use unum_hasAttribute() to tell if the attribute
+* is available, rather than relaying on this function returning -1.
+* @see unum_hasAttribute
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_getAttribute(const UNumberFormat* fmt,
+ UNumberFormatAttribute attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce. If the
+* formatter does not understand the attribute, the call is ignored. Rule-based formatters only understand
+* the lenient-parse attribute. The caller can use unum_hasAttribute() to find out if the formatter supports the attribute.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED,
+* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS,
+* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER,
+* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE,
+* UNUM_LENIENT_PARSE, UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS.
+* @param newValue The new value of attr.
+* @see unum_hasAttribute
+* @see unum_getAttribute
+* @see unum_getDoubleAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+unum_setAttribute( UNumberFormat* fmt,
+ UNumberFormatAttribute attr,
+ int32_t newValue);
+
+
+/**
+* Get a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, -1 is returned. The caller should use unum_hasAttribute()
+* to determine if the attribute is supported, rather than relying on this function returning -1.
+* @param fmt The formatter to query.
+* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT.
+* @return The value of attr, or -1 if the formatter doesn't understand the attribute.
+* @see unum_hasAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_setDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_CAPI double U_EXPORT2
+unum_getDoubleAttribute(const UNumberFormat* fmt,
+ UNumberFormatAttribute attr);
+
+/**
+* Set a numeric attribute associated with a UNumberFormat.
+* An example of a numeric attribute is the number of integer digits a formatter will produce.
+* If the formatter does not understand the attribute, this call is ignored. The caller can use
+* unum_hasAttribute() to tell in advance whether the formatter understands the attribute.
+* @param fmt The formatter to set.
+* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT.
+* @param newValue The new value of attr.
+* @see unum_hasAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @see unum_getDoubleAttribute
+* @see unum_getTextAttribute
+* @see unum_setTextAttribute
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+unum_setDoubleAttribute( UNumberFormat* fmt,
+ UNumberFormatAttribute attr,
+ double newValue);
+
+/** The possible UNumberFormat text attributes @stable ICU 2.0*/
+typedef enum UNumberFormatTextAttribute {
+ /** Positive prefix */
+ UNUM_POSITIVE_PREFIX,
+ /** Positive suffix */
+ UNUM_POSITIVE_SUFFIX,
+ /** Negative prefix */
+ UNUM_NEGATIVE_PREFIX,
+ /** Negative suffix */
+ UNUM_NEGATIVE_SUFFIX,
+ /** The character used to pad to the format width. */
+ UNUM_PADDING_CHARACTER,
+ /** The ISO currency code */
+ UNUM_CURRENCY_CODE,
+ /**
+ * The default rule set, such as "%spellout-numbering-year:", "%spellout-cardinal:",
+ * "%spellout-ordinal-masculine-plural:", "%spellout-ordinal-feminine:", or
+ * "%spellout-ordinal-neuter:". The available public rulesets can be listed using
+ * unum_getTextAttribute with UNUM_PUBLIC_RULESETS. This is only available with
+ * rule-based formatters.
+ * @stable ICU 3.0
+ */
+ UNUM_DEFAULT_RULESET,
+ /**
+ * The public rule sets. This is only available with rule-based formatters.
+ * This is a read-only attribute. The public rulesets are returned as a
+ * single string, with each ruleset name delimited by ';' (semicolon). See the
+ * CLDR LDML spec for more information about RBNF rulesets:
+ * http://www.unicode.org/reports/tr35/tr35-numbers.html#Rule-Based_Number_Formatting
+ * @stable ICU 3.0
+ */
+ UNUM_PUBLIC_RULESETS
+} UNumberFormatTextAttribute;
+
+/**
+* Get a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers. If the formatter
+* does not understand the attribute, U_UNSUPPORTED_ERROR is returned as the status.
+* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS.
+* @param fmt The formatter to query.
+* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS.
+* @param result A pointer to a buffer to receive the attribute.
+* @param resultLength The maximum size of result.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The total buffer size needed; if greater than resultLength, the output was truncated.
+* @see unum_setTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_getTextAttribute( const UNumberFormat* fmt,
+ UNumberFormatTextAttribute tag,
+ UChar* result,
+ int32_t resultLength,
+ UErrorCode* status);
+
+/**
+* Set a text attribute associated with a UNumberFormat.
+* An example of a text attribute is the suffix for positive numbers. Rule-based formatters
+* only understand UNUM_DEFAULT_RULESET.
+* @param fmt The formatter to set.
+* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX,
+* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE,
+* or UNUM_DEFAULT_RULESET.
+* @param newValue The new value of attr.
+* @param newValueLength The length of newValue, or -1 if null-terminated.
+* @param status A pointer to an UErrorCode to receive any errors
+* @see unum_getTextAttribute
+* @see unum_getAttribute
+* @see unum_setAttribute
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+unum_setTextAttribute( UNumberFormat* fmt,
+ UNumberFormatTextAttribute tag,
+ const UChar* newValue,
+ int32_t newValueLength,
+ UErrorCode *status);
+
+/**
+ * Extract the pattern from a UNumberFormat. The pattern will follow
+ * the DecimalFormat pattern syntax.
+ * @param fmt The formatter to query.
+ * @param isPatternLocalized true if the pattern should be localized,
+ * false otherwise. This is ignored if the formatter is a rule-based
+ * formatter.
+ * @param result A pointer to a buffer to receive the pattern.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an input-output UErrorCode.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated.
+ * @see unum_applyPattern
+ * @see DecimalFormat
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+unum_toPattern( const UNumberFormat* fmt,
+ UBool isPatternLocalized,
+ UChar* result,
+ int32_t resultLength,
+ UErrorCode* status);
+
+
+/**
+ * Constants for specifying a number format symbol.
+ * @stable ICU 2.0
+ */
+typedef enum UNumberFormatSymbol {
+ /** The decimal separator */
+ UNUM_DECIMAL_SEPARATOR_SYMBOL = 0,
+ /** The grouping separator */
+ UNUM_GROUPING_SEPARATOR_SYMBOL = 1,
+ /** The pattern separator */
+ UNUM_PATTERN_SEPARATOR_SYMBOL = 2,
+ /** The percent sign */
+ UNUM_PERCENT_SYMBOL = 3,
+ /** Zero*/
+ UNUM_ZERO_DIGIT_SYMBOL = 4,
+ /** Character representing a digit in the pattern */
+ UNUM_DIGIT_SYMBOL = 5,
+ /** The minus sign */
+ UNUM_MINUS_SIGN_SYMBOL = 6,
+ /** The plus sign */
+ UNUM_PLUS_SIGN_SYMBOL = 7,
+ /** The currency symbol */
+ UNUM_CURRENCY_SYMBOL = 8,
+ /** The international currency symbol */
+ UNUM_INTL_CURRENCY_SYMBOL = 9,
+ /** The monetary separator */
+ UNUM_MONETARY_SEPARATOR_SYMBOL = 10,
+ /** The exponential symbol */
+ UNUM_EXPONENTIAL_SYMBOL = 11,
+ /** Per mill symbol */
+ UNUM_PERMILL_SYMBOL = 12,
+ /** Escape padding character */
+ UNUM_PAD_ESCAPE_SYMBOL = 13,
+ /** Infinity symbol */
+ UNUM_INFINITY_SYMBOL = 14,
+ /** Nan symbol */
+ UNUM_NAN_SYMBOL = 15,
+ /** Significant digit symbol
+ * @stable ICU 3.0 */
+ UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16,
+ /** The monetary grouping separator
+ * @stable ICU 3.6
+ */
+ UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17,
+ /** One
+ * @stable ICU 4.6
+ */
+ UNUM_ONE_DIGIT_SYMBOL = 18,
+ /** Two
+ * @stable ICU 4.6
+ */
+ UNUM_TWO_DIGIT_SYMBOL = 19,
+ /** Three
+ * @stable ICU 4.6
+ */
+ UNUM_THREE_DIGIT_SYMBOL = 20,
+ /** Four
+ * @stable ICU 4.6
+ */
+ UNUM_FOUR_DIGIT_SYMBOL = 21,
+ /** Five
+ * @stable ICU 4.6
+ */
+ UNUM_FIVE_DIGIT_SYMBOL = 22,
+ /** Six
+ * @stable ICU 4.6
+ */
+ UNUM_SIX_DIGIT_SYMBOL = 23,
+ /** Seven
+ * @stable ICU 4.6
+ */
+ UNUM_SEVEN_DIGIT_SYMBOL = 24,
+ /** Eight
+ * @stable ICU 4.6
+ */
+ UNUM_EIGHT_DIGIT_SYMBOL = 25,
+ /** Nine
+ * @stable ICU 4.6
+ */
+ UNUM_NINE_DIGIT_SYMBOL = 26,
+
+ /** Multiplication sign
+ * @stable ICU 54
+ */
+ UNUM_EXPONENT_MULTIPLICATION_SYMBOL = 27,
+
+#ifndef U_HIDE_INTERNAL_API
+ /** Approximately sign.
+ * @internal
+ */
+ UNUM_APPROXIMATELY_SIGN_SYMBOL = 28,
+#endif
+} UNumberFormatSymbol;
+
+/**
+* Get a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign. This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to query.
+* @param symbol The UNumberFormatSymbol constant for the symbol to get
+* @param buffer The string buffer that will receive the symbol string;
+* if it is NULL, then only the length of the symbol is returned
+* @param size The size of the string buffer
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The length of the symbol; the buffer is not modified if
+*
+ *
+ * This enum is similar to {@link UMeasureFormatWidth}.
+ *
+ * @stable ICU 60
+ */
+typedef enum UNumberUnitWidth {
+ /**
+ * Print an abbreviated version of the unit name. Similar to SHORT, but always use the shortest available
+ * abbreviation or symbol. This option can be used when the context hints at the identity of the unit. For more
+ * information on the difference between NARROW and SHORT, see SHORT.
+ *
+ *
+ * In CLDR, this option corresponds to the "Narrow" format for measure units and the "¤¤¤¤¤" placeholder for
+ * currencies.
+ *
+ * @stable ICU 60
+ */
+ UNUM_UNIT_WIDTH_NARROW = 0,
+
+ /**
+ * Print an abbreviated version of the unit name. Similar to NARROW, but use a slightly wider abbreviation or
+ * symbol when there may be ambiguity. This is the default behavior.
+ *
+ *
+ * For example, in es-US, the SHORT form for Fahrenheit is "{0} °F", but the NARROW form is "{0}°",
+ * since Fahrenheit is the customary unit for temperature in that locale.
+ *
+ *
+ * In CLDR, this option corresponds to the "Short" format for measure units and the "¤" placeholder for
+ * currencies.
+ *
+ * @stable ICU 60
+ */
+ UNUM_UNIT_WIDTH_SHORT = 1,
+
+ /**
+ * Print the full name of the unit, without any abbreviations.
+ *
+ *
+ * In CLDR, this option corresponds to the default format for measure units and the "¤¤¤" placeholder for
+ * currencies.
+ *
+ * @stable ICU 60
+ */
+ UNUM_UNIT_WIDTH_FULL_NAME = 2,
+
+ /**
+ * Use the three-digit ISO XXX code in place of the symbol for displaying currencies. The behavior of this
+ * option is currently undefined for use with measure units.
+ *
+ *
+ * In CLDR, this option corresponds to the "¤¤" placeholder for currencies.
+ *
+ * @stable ICU 60
+ */
+ UNUM_UNIT_WIDTH_ISO_CODE = 3,
+
+ /**
+ * Use the formal variant of the currency symbol; for example, "NT$" for the New Taiwan
+ * dollar in zh-TW.
+ *
+ *
+ * Behavior of this option with non-currency units is not defined at this time.
+ *
+ * @stable ICU 68
+ */
+ UNUM_UNIT_WIDTH_FORMAL = 4,
+
+ /**
+ * Use the alternate variant of the currency symbol; for example, "TL" for the Turkish
+ * lira (TRY).
+ *
+ *
+ * Behavior of this option with non-currency units is not defined at this time.
+ *
+ * @stable ICU 68
+ */
+ UNUM_UNIT_WIDTH_VARIANT = 5,
+
+ /**
+ * Format the number according to the specified unit, but do not display the unit. For currencies, apply
+ * monetary symbols and formats as with SHORT, but omit the currency symbol. For measure units, the behavior is
+ * equivalent to not specifying the unit at all.
+ *
+ * @stable ICU 60
+ */
+ UNUM_UNIT_WIDTH_HIDDEN = 6,
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // needed for unconditionalized struct MacroProps
+ /**
+ * One more than the highest UNumberUnitWidth value.
+ *
+ * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420.
+ */
+ UNUM_UNIT_WIDTH_COUNT = 7
+} UNumberUnitWidth;
+
+/**
+ * An enum declaring the strategy for when and how to display grouping separators (i.e., the
+ * separator, often a comma or period, after every 2-3 powers of ten). The choices are several
+ * pre-built strategies for different use cases that employ locale data whenever possible. Example
+ * outputs for 1234 and 1234567 in en-IN:
+ *
+ *
+ * The default is AUTO, which displays grouping separators unless the locale data says that grouping
+ * is not customary. To force grouping for all numbers greater than 1000 consistently across locales,
+ * use ON_ALIGNED. On the other hand, to display grouping less frequently than the default, use MIN2
+ * or OFF. See the docs of each option for details.
+ *
+ *
+ * Note: This enum specifies the strategy for grouping sizes. To set which character to use as the
+ * grouping separator, use the "symbols" setter.
+ *
+ * @stable ICU 63
+ */
+typedef enum UNumberGroupingStrategy {
+ /**
+ * Do not display grouping separators in any locale.
+ *
+ * @stable ICU 61
+ */
+ UNUM_GROUPING_OFF,
+
+ /**
+ * Display grouping using locale defaults, except do not show grouping on values smaller than
+ * 10000 (such that there is a minimum of two digits before the first separator).
+ *
+ *
+ * Note that locales may restrict grouping separators to be displayed only on 1 million or
+ * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency).
+ *
+ *
+ * Locale data is used to determine whether to separate larger numbers into groups of 2
+ * (customary in South Asia) or groups of 3 (customary in Europe and the Americas).
+ *
+ * @stable ICU 61
+ */
+ UNUM_GROUPING_MIN2,
+
+ /**
+ * Display grouping using the default strategy for all locales. This is the default behavior.
+ *
+ *
+ * Note that locales may restrict grouping separators to be displayed only on 1 million or
+ * greater (for example, ee and hu) or disable grouping altogether (for example, bg currency).
+ *
+ *
+ * Locale data is used to determine whether to separate larger numbers into groups of 2
+ * (customary in South Asia) or groups of 3 (customary in Europe and the Americas).
+ *
+ * @stable ICU 61
+ */
+ UNUM_GROUPING_AUTO,
+
+ /**
+ * Always display the grouping separator on values of at least 1000.
+ *
+ *
+ * This option ignores the locale data that restricts or disables grouping, described in MIN2 and
+ * AUTO. This option may be useful to normalize the alignment of numbers, such as in a
+ * spreadsheet.
+ *
+ *
+ * Locale data is used to determine whether to separate larger numbers into groups of 2
+ * (customary in South Asia) or groups of 3 (customary in Europe and the Americas).
+ *
+ * @stable ICU 61
+ */
+ UNUM_GROUPING_ON_ALIGNED,
+
+ /**
+ * Use the Western defaults: groups of 3 and enabled for all numbers 1000 or greater. Do not use
+ * locale data for determining the grouping strategy.
+ *
+ * @stable ICU 61
+ */
+ UNUM_GROUPING_THOUSANDS
+
+#ifndef U_HIDE_INTERNAL_API
+ ,
+ /**
+ * One more than the highest UNumberGroupingStrategy value.
+ *
+ * @internal ICU 62: The numeric value may change over time; see ICU ticket #12420.
+ */
+ UNUM_GROUPING_COUNT
+#endif /* U_HIDE_INTERNAL_API */
+
+} UNumberGroupingStrategy;
+
+/**
+ * An enum declaring how to denote positive and negative numbers. Example outputs when formatting
+ * 123, 0, and -123 in en-US:
+ *
+ *
+ * The exact format, including the position and the code point of the sign, differ by locale.
+ *
+ * @stable ICU 60
+ */
+typedef enum UNumberSignDisplay {
+ /**
+ * Show the minus sign on negative numbers, and do not show the sign on positive numbers. This is the default
+ * behavior.
+ *
+ * If using this option, a sign will be displayed on negative zero, including negative numbers
+ * that round to zero. To hide the sign on negative zero, use the NEGATIVE option.
+ *
+ * @stable ICU 60
+ */
+ UNUM_SIGN_AUTO,
+
+ /**
+ * Show the minus sign on negative numbers and the plus sign on positive numbers, including zero.
+ * To hide the sign on zero, see {@link UNUM_SIGN_EXCEPT_ZERO}.
+ *
+ * @stable ICU 60
+ */
+ UNUM_SIGN_ALWAYS,
+
+ /**
+ * Do not show the sign on positive or negative numbers.
+ *
+ * @stable ICU 60
+ */
+ UNUM_SIGN_NEVER,
+
+ /**
+ * Use the locale-dependent accounting format on negative numbers, and do not show the sign on positive numbers.
+ *
+ *
+ * The accounting format is defined in CLDR and varies by locale; in many Western locales, the format is a pair
+ * of parentheses around the number.
+ *
+ *
+ * Note: Since CLDR defines the accounting format in the monetary context only, this option falls back to the
+ * AUTO sign display strategy when formatting without a currency unit. This limitation may be lifted in the
+ * future.
+ *
+ * @stable ICU 60
+ */
+ UNUM_SIGN_ACCOUNTING,
+
+ /**
+ * Use the locale-dependent accounting format on negative numbers, and show the plus sign on
+ * positive numbers, including zero. For more information on the accounting format, see the
+ * ACCOUNTING sign display strategy. To hide the sign on zero, see
+ * {@link UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO}.
+ *
+ * @stable ICU 60
+ */
+ UNUM_SIGN_ACCOUNTING_ALWAYS,
+
+ /**
+ * Show the minus sign on negative numbers and the plus sign on positive numbers. Do not show a
+ * sign on zero, numbers that round to zero, or NaN.
+ *
+ * @stable ICU 61
+ */
+ UNUM_SIGN_EXCEPT_ZERO,
+
+ /**
+ * Use the locale-dependent accounting format on negative numbers, and show the plus sign on
+ * positive numbers. Do not show a sign on zero, numbers that round to zero, or NaN. For more
+ * information on the accounting format, see the ACCOUNTING sign display strategy.
+ *
+ * @stable ICU 61
+ */
+ UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO,
+
+ /**
+ * Same as AUTO, but do not show the sign on negative zero.
+ *
+ * @stable ICU 69
+ */
+ UNUM_SIGN_NEGATIVE,
+
+ /**
+ * Same as ACCOUNTING, but do not show the sign on negative zero.
+ *
+ * @stable ICU 69
+ */
+ UNUM_SIGN_ACCOUNTING_NEGATIVE,
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // needed for unconditionalized struct MacroProps
+ /**
+ * One more than the highest UNumberSignDisplay value.
+ *
+ * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420.
+ */
+ UNUM_SIGN_COUNT = 9,
+} UNumberSignDisplay;
+
+/**
+ * An enum declaring how to render the decimal separator.
+ *
+ *
+ * Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
+ * a fast lookup with no access of the locale data.
+ *
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) of UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Returns the long Unicode script name, if there is one.
+ * Otherwise returns the 4-letter ISO 15924 script code.
+ * Returns "Malayam" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
+ * or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Returns the 4-letter ISO 15924 script code,
+ * which is the same as the short Unicode script name if Unicode has names for the script.
+ * Returns "Mlym" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return short script name (4-letter code), or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode);
+
+/**
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI UScriptCode U_EXPORT2
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+/**
+ * Do the Script_Extensions of code point c contain script sc?
+ * If c does not have explicit Script_Extensions, then this tests whether
+ * c has the Script property value sc.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ * @param c code point
+ * @param sc script code
+ * @return true if sc is in Script_Extensions(c)
+ * @stable ICU 49
+ */
+U_CAPI UBool U_EXPORT2
+uscript_hasScript(UChar32 c, UScriptCode sc);
+
+/**
+ * Writes code point c's Script_Extensions as a list of UScriptCode values
+ * to the output scripts array and returns the number of script codes.
+ * - If c does have Script_Extensions, then the Script property value
+ * (normally Common or Inherited) is not included.
+ * - If c does not have Script_Extensions, then the one Script code is written to the output array.
+ * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
+ * In other words, if the return value is 1,
+ * then the output array contains exactly c's single Script code.
+ * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ *
+ * If there are more than capacity script codes to be written, then
+ * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
+ * (Usual ICU buffer handling behavior.)
+ *
+ * @param c code point
+ * @param scripts output script code array
+ * @param capacity capacity of the scripts array
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
+ * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getScriptExtensions(UChar32 c,
+ UScriptCode *scripts, int32_t capacity,
+ UErrorCode *errorCode);
+
+/**
+ * Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ *
+ * @stable ICU 51
+ */
+typedef enum UScriptUsage {
+ /** Not encoded in Unicode. @stable ICU 51 */
+ USCRIPT_USAGE_NOT_ENCODED,
+ /** Unknown script usage. @stable ICU 51 */
+ USCRIPT_USAGE_UNKNOWN,
+ /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
+ USCRIPT_USAGE_EXCLUDED,
+ /** Limited Use script. @stable ICU 51 */
+ USCRIPT_USAGE_LIMITED_USE,
+ /** Aspirational Use script. @stable ICU 51 */
+ USCRIPT_USAGE_ASPIRATIONAL,
+ /** Recommended script. @stable ICU 51 */
+ USCRIPT_USAGE_RECOMMENDED
+} UScriptUsage;
+
+/**
+ * Writes the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @param dest output string array
+ * @param capacity number of UChars in the dest array
+ * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
+ * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
+ * @stable ICU 51
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+class UnicodeString;
+U_NAMESPACE_END
+
+/**
+ * Returns the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @return the sample character string
+ * @stable ICU 51
+ */
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script);
+
+#endif
+
+/**
+ * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ *
+ * @param script script code
+ * @return script usage
+ * @see UScriptUsage
+ * @stable ICU 51
+ */
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script);
+
+/**
+ * Returns true if the script is written right-to-left.
+ * For example, Arab and Hebr.
+ *
+ * @param script script code
+ * @return true if the script is right-to-left
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script);
+
+/**
+ * Returns true if the script allows line breaks between letters (excluding hyphenation).
+ * Such a script typically requires dictionary-based line breaking.
+ * For example, Hani and Thai.
+ *
+ * @param script script code
+ * @return true if the script allows line breaks between letters
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script);
+
+/**
+ * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
+ * For example, Latn and Cyrl.
+ *
+ * @param script script code
+ * @return true if the script is cased
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script);
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/uset.h b/third_party/icu4c/ndk_headers/unicode/uset.h
new file mode 100644
index 00000000000..5fd3361fc44
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/uset.h
@@ -0,0 +1,524 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * This is a C wrapper around the C++ UnicodeSet class. After a transliteration operation, some of the indices in this
+ * structure will be modified. See the field descriptions for
+ * details.
+ *
+ * contextStart <= start <= limit <= contextLimit
+ *
+ * Note: All index values in this structure must be at code point
+ * boundaries. That is, none of them may occur between two code units
+ * of a surrogate pair. If any index does split a surrogate pair,
+ * results are unspecified.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UTransPosition {
+
+ /**
+ * Beginning index, inclusive, of the context to be considered for
+ * a transliteration operation. The transliterator will ignore
+ * anything before this index. INPUT/OUTPUT parameter: This parameter
+ * is updated by a transliteration operation to reflect the maximum
+ * amount of antecontext needed by a transliterator.
+ * @stable ICU 2.4
+ */
+ int32_t contextStart;
+
+ /**
+ * Ending index, exclusive, of the context to be considered for a
+ * transliteration operation. The transliterator will ignore
+ * anything at or after this index. INPUT/OUTPUT parameter: This
+ * parameter is updated to reflect changes in the length of the
+ * text, but points to the same logical position in the text.
+ * @stable ICU 2.4
+ */
+ int32_t contextLimit;
+
+ /**
+ * Beginning index, inclusive, of the text to be transliterated.
+ * INPUT/OUTPUT parameter: This parameter is advanced past
+ * characters that have already been transliterated by a
+ * transliteration operation.
+ * @stable ICU 2.4
+ */
+ int32_t start;
+
+ /**
+ * Ending index, exclusive, of the text to be transliterated.
+ * INPUT/OUTPUT parameter: This parameter is updated to reflect
+ * changes in the length of the text, but points to the same
+ * logical position in the text.
+ * @stable ICU 2.4
+ */
+ int32_t limit;
+
+} UTransPosition;
+
+/********************************************************************
+ * General API
+ ********************************************************************/
+
+/**
+ * Open a custom transliterator, given a custom rules string
+ * OR
+ * a system transliterator, given its ID.
+ * Any non-NULL result from this function should later be closed with
+ * utrans_close().
+ *
+ * @param id a valid transliterator ID
+ * @param idLength the length of the ID string, or -1 if NUL-terminated
+ * @param dir the desired direction
+ * @param rules the transliterator rules. See the C++ header rbt.h for
+ * rules syntax. If NULL then a system transliterator matching
+ * the ID is returned.
+ * @param rulesLength the length of the rules, or -1 if the rules
+ * are NUL-terminated.
+ * @param parseError a pointer to a UParseError struct to receive the details
+ * of any parsing errors. This parameter may be NULL if no
+ * parsing error details are desired.
+ * @param pErrorCode a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ * utrans_xxx() functions, or NULL if the open call fails.
+ * @stable ICU 2.8
+ */
+U_CAPI UTransliterator* U_EXPORT2
+utrans_openU(const UChar *id,
+ int32_t idLength,
+ UTransDirection dir,
+ const UChar *rules,
+ int32_t rulesLength,
+ UParseError *parseError,
+ UErrorCode *pErrorCode);
+
+/**
+ * Open an inverse of an existing transliterator. For this to work,
+ * the inverse must be registered with the system. For example, if
+ * the Transliterator "A-B" is opened, and then its inverse is opened,
+ * the result is the Transliterator "B-A", if such a transliterator is
+ * registered with the system. Otherwise the result is NULL and a
+ * failing UErrorCode is set. Any non-NULL result from this function
+ * should later be closed with utrans_close().
+ *
+ * @param trans the transliterator to open the inverse of.
+ * @param status a pointer to the UErrorCode
+ * @return a pointer to a newly-opened transliterator that is the
+ * inverse of trans, or NULL if the open call fails.
+ * @stable ICU 2.0
+ */
+U_CAPI UTransliterator* U_EXPORT2
+utrans_openInverse(const UTransliterator* trans,
+ UErrorCode* status);
+
+/**
+ * Create a copy of a transliterator. Any non-NULL result from this
+ * function should later be closed with utrans_close().
+ *
+ * @param trans the transliterator to be copied.
+ * @param status a pointer to the UErrorCode
+ * @return a transliterator pointer that may be passed to other
+ * utrans_xxx() functions, or NULL if the clone call fails.
+ * @stable ICU 2.0
+ */
+U_CAPI UTransliterator* U_EXPORT2
+utrans_clone(const UTransliterator* trans,
+ UErrorCode* status);
+
+/**
+ * Close a transliterator. Any non-NULL pointer returned by
+ * utrans_openXxx() or utrans_clone() should eventually be closed.
+ * @param trans the transliterator to be closed.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_close(UTransliterator* trans);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUTransliteratorPointer
+ * "Smart pointer" class, closes a UTransliterator via utrans_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUTransliteratorPointer, UTransliterator, utrans_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Return the programmatic identifier for this transliterator.
+ * If this identifier is passed to utrans_openU(), it will open
+ * a transliterator equivalent to this one, if the ID has been
+ * registered.
+ *
+ * @param trans the transliterator to return the ID of.
+ * @param resultLength pointer to an output variable receiving the length
+ * of the ID string; can be NULL
+ * @return the NUL-terminated ID string. This pointer remains
+ * valid until utrans_close() is called on this transliterator.
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI const UChar * U_EXPORT2
+utrans_getUnicodeID(const UTransliterator *trans,
+ int32_t *resultLength);
+
+/**
+ * Register an open transliterator with the system. When
+ * utrans_open() is called with an ID string that is equal to that
+ * returned by utrans_getID(adoptedTrans,...), then
+ * utrans_clone(adoptedTrans,...) is returned.
+ *
+ * NOTE: After this call the system owns the adoptedTrans and will
+ * close it. The user must not call utrans_close() on adoptedTrans.
+ *
+ * @param adoptedTrans a transliterator, typically the result of
+ * utrans_openRules(), to be registered with the system.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_register(UTransliterator* adoptedTrans,
+ UErrorCode* status);
+
+/**
+ * Unregister a transliterator from the system. After this call the
+ * system will no longer recognize the given ID when passed to
+ * utrans_open(). If the ID is invalid then nothing is done.
+ *
+ * @param id an ID to unregister
+ * @param idLength the length of id, or -1 if id is zero-terminated
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrans_unregisterID(const UChar* id, int32_t idLength);
+
+/**
+ * Set the filter used by a transliterator. A filter can be used to
+ * make the transliterator pass certain characters through untouched.
+ * The filter is expressed using a UnicodeSet pattern. If the
+ * filterPattern is NULL or the empty string, then the transliterator
+ * will be reset to use no filter.
+ *
+ * @param trans the transliterator
+ * @param filterPattern a pattern string, in the form accepted by
+ * UnicodeSet, specifying which characters to apply the
+ * transliteration to. May be NULL or the empty string to indicate no
+ * filter.
+ * @param filterPatternLen the length of filterPattern, or -1 if
+ * filterPattern is zero-terminated
+ * @param status a pointer to the UErrorCode
+ * @see UnicodeSet
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_setFilter(UTransliterator* trans,
+ const UChar* filterPattern,
+ int32_t filterPatternLen,
+ UErrorCode* status);
+
+/**
+ * Return the number of system transliterators.
+ * It is recommended to use utrans_openIDs() instead.
+ *
+ * @return the number of system transliterators.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+utrans_countAvailableIDs(void);
+
+/**
+ * Return a UEnumeration for the available transliterators.
+ *
+ * @param pErrorCode Pointer to the UErrorCode in/out parameter.
+ * @return UEnumeration for the available transliterators.
+ * Close with uenum_close().
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI UEnumeration * U_EXPORT2
+utrans_openIDs(UErrorCode *pErrorCode);
+
+/********************************************************************
+ * Transliteration API
+ ********************************************************************/
+
+/**
+ * Transliterate a segment of a UReplaceable string. The string is
+ * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks
+ * function pointer struct repFunc. Functions in the repFunc struct
+ * will be called in order to modify the rep string.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string. This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param start the beginning index, inclusive; Upon return, values in Typical usage of this method begins with an initial call
+ * with This method assumes that future calls may be made that will
+ * insert new text into the buffer. As a result, it only performs
+ * unambiguous transliterations. After the last call to this method,
+ * there may be untransliterated text that is waiting for more input
+ * to resolve an ambiguity. In order to perform these pending
+ * transliterations, clients should call utrans_trans() with a start
+ * of index.start and a limit of index.end after the last call to this
+ * method has been made.
+ *
+ * @param trans the transliterator
+ * @param rep a pointer to the string. This will be passed to the
+ * repFunc functions.
+ * @param repFunc a set of function pointers that will be used to
+ * modify the string pointed to by rep.
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_transIncremental(const UTransliterator* trans,
+ UReplaceable* rep,
+ const UReplaceableCallbacks* repFunc,
+ UTransPosition* pos,
+ UErrorCode* status);
+
+/**
+ * Transliterate a segment of a UChar* string. The string is passed
+ * in in a UChar* buffer. The string is modified in place. If the
+ * result is longer than textCapacity, it is truncated. The actual
+ * length of the result is returned in *textLength, if textLength is
+ * non-NULL. *textLength may be greater than textCapacity, but only
+ * textCapacity UChars will be written to *text, including the zero
+ * terminator.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated. Upon return, the new length is stored in
+ * *textLength. If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity the length of the text buffer
+ * @param start the beginning index, inclusive; Upon return, values in Typical usage of this method begins with an initial call
- * with This method assumes that future calls may be made that will
- * insert new text into the buffer. As a result, it only performs
- * unambiguous transliterations. After the last call to this method,
- * there may be untransliterated text that is waiting for more input
- * to resolve an ambiguity. In order to perform these pending
- * transliterations, clients should call utrans_trans() with a start
- * of index.start and a limit of index.end after the last call to this
- * method has been made.
- *
- * @param trans the transliterator
- * @param rep a pointer to the string. This will be passed to the
- * repFunc functions.
- * @param repFunc a set of function pointers that will be used to
- * modify the string pointed to by rep.
- * @param pos a struct containing the start and limit indices of the
- * text to be read and the text to be transliterated
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_CAPI void U_EXPORT2
-utrans_transIncremental(const UTransliterator* trans,
- UReplaceable* rep,
- const UReplaceableCallbacks* repFunc,
- UTransPosition* pos,
- UErrorCode* status);
-
/**
* Transliterate a segment of a UChar* string. The string is passed
* in in a UChar* buffer. The string is modified in place. If the
--
Gitee
From 7c4c54e6c299863c574c1bc43c8924d744a53932 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=BD=97=E5=8D=9A=E6=98=8E?= pParaBiDi
is also copied,
+ * and start
is added to it so that it points to the beginning of the
+ * line for this object.
+ *
+ * @param pParaBiDi is the parent paragraph object. It must have been set
+ * by a successful call to ubidi_setPara.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ * (its last index +1).
+ * It must be 0<=start
, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be UBiDi
object.
+ *
+ * @return a value of UBIDI_LTR
, UBIDI_RTL
+ * or UBIDI_MIXED
+ * that indicates if the entire text
+ * represented by this object is unidirectional,
+ * and which direction, or if it is mixed-directional.
+ * Note - The value UBIDI_NEUTRAL
is never returned from this method.
+ *
+ * @see UBiDiDirection
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi);
+
+/**
+ * Gets the base direction of the text provided according
+ * to the Unicode Bidirectional Algorithm. The base direction
+ * is derived from the first character in the string with bidirectional
+ * character type L, R, or AL. If the first such character has type L,
+ * UBIDI_LTR
is returned. If the first such character has
+ * type R or AL, UBIDI_RTL
is returned. If the string does
+ * not contain any character of these types, then
+ * UBIDI_NEUTRAL
is returned.
+ *
+ * This is a lightweight function for use when only the base direction
+ * is needed and no further bidi processing of the text is needed.
+ *
+ * @param text is a pointer to the text whose base
+ * direction is needed.
+ * Note: the text must be (at least) @c length long.
+ *
+ * @param length is the length of the text;
+ * if length==-1
then the text
+ * must be zero-terminated.
+ *
+ * @return UBIDI_LTR
, UBIDI_RTL
,
+ * UBIDI_NEUTRAL
+ *
+ * @see UBiDiDirection
+ * @stable ICU 4.6
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getBaseDirection(const UChar *text, int32_t length );
+
+/**
+ * Get the pointer to the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @return The pointer to the text that the UBiDi object was created for.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @return The length of the text that the UBiDi object was created for.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi);
+
+/**
+ * Get the paragraph level of the text.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
+ * is returned.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getParagraph
+ * @see ubidi_getParagraphByIndex
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi);
+
+/**
+ * Get the number of paragraphs.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @return The number of paragraphs.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi);
+
+/**
+ * Get a paragraph, given a position within the text.
+ * This function returns information about a paragraph.
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using ubidi_getParagraphByIndex().UBiDi
object.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range [0..ubidi_getProcessedLength(pBiDi)-1]
.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * charIndex
.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of the paragraph containing the specified position.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
+ int32_t *pParaLimit, UBiDiLevel *pParaLevel,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This function returns information about a paragraph.UBiDi
object.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ * range [0..ubidi_countParagraphs(pBiDi)-1]
.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
+
+/**
+ * Get the level for one character.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param charIndex the index of a character. It must be in the range
+ * [0..ubidi_getProcessedLength(pBiDi)].
+ *
+ * @return The level for the character at charIndex (0 if charIndex is not
+ * in the valid range).
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
+
+/**
+ * Get an array of levels for each character.ubidi_getLevelAt()
.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object, whose
+ * text length must be strictly positive.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The levels array for the text,
+ * or NULL
if an error occurs.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical run.
+ * This function returns information about a run and is used
+ * to retrieve runs in logical order.UBiDi
object.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @param pLogicalLimit will receive the limit of the corresponding run.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * logicalPosition
.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @param pLevel will receive the level of the corresponding run.
+ * This pointer can be NULL
if this
+ * value is not necessary.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+ int32_t *pLogicalLimit, UBiDiLevel *pLevel);
+
+/**
+ * Get the number of runs.
+ * This function may invoke the actual reordering on the
+ * UBiDi
object, after ubidi_setPara()
+ * may have resolved only the levels of the text. Therefore,
+ * ubidi_countRuns()
may have to allocate memory,
+ * and may fail doing so.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The number of runs.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get one run's logical start, length, and directionality,
+ * which can be 0 for LTR or 1 for RTL.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.ubidi_countRuns()
should be called
+ * before the runs are retrieved.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range [0..ubidi_countRuns(pBiDi)-1]
.
+ *
+ * @param pLogicalStart is the first logical character index in the text.
+ * The pointer may be NULL
if this index is not needed.
+ *
+ * @param pLength is the number of characters (at least one) in the run.
+ * The pointer may be NULL
if this is not needed.
+ *
+ * @return the directionality of the run,
+ * UBIDI_LTR==0
or UBIDI_RTL==1
,
+ * never UBIDI_MIXED
,
+ * never UBIDI_NEUTRAL
.
+ *
+ * @see ubidi_countRuns
+ *
+ * Example:
+ *
+ * \code
+ * int32_t i, count=ubidi_countRuns(pBiDi),
+ * logicalStart, visualIndex=0, length;
+ * for(i=0; i
+ *
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * ubidi_writeReordered()
, optionally with the
+ * #UBIDI_KEEP_BASE_COMBINING
option, can be considered in order
+ * to avoid these issues.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+ int32_t *pLogicalStart, int32_t *pLength);
+
+/**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * UBiDi
object, then calling
+ * ubidi_getLogicalMap()
is more efficient.#UBIDI_MAP_NOWHERE
if there is no
+ * visual position because the corresponding text character is a Bidi control
+ * removed from output by the option #UBIDI_OPTION_REMOVE_CONTROLS
.
+ * ubidi_writeReordered()
such as UBIDI_INSERT_LRM_FOR_NUMERIC
,
+ * UBIDI_KEEP_BASE_COMBINING
, UBIDI_OUTPUT_REVERSE
,
+ * UBIDI_REMOVE_BIDI_CONTROLS
, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS
and UBIDI_OPTION_REMOVE_CONTROLS
.
+ * ubidi_writeReordered()
, optionally with the
+ * #UBIDI_KEEP_BASE_COMBINING
option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The visual position of this character.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * UBiDi
object, then calling
+ * ubidi_getVisualMap()
is more efficient.#UBIDI_MAP_NOWHERE
if there is no
+ * logical position because the corresponding text character is a Bidi mark
+ * inserted in the output by option #UBIDI_OPTION_INSERT_MARKS
.
+ * ubidi_getVisualIndex()
.
+ * ubidi_writeReordered()
such as UBIDI_INSERT_LRM_FOR_NUMERIC
,
+ * UBIDI_KEEP_BASE_COMBINING
, UBIDI_OUTPUT_REVERSE
,
+ * UBIDI_REMOVE_BIDI_CONTROLS
, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS
and UBIDI_OPTION_REMOVE_CONTROLS
.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of this character in the text.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical-to-visual index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * #UBIDI_MAP_NOWHERE
if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option #UBIDI_OPTION_REMOVE_CONTROLS
.
+ * ubidi_writeReordered()
such as UBIDI_INSERT_LRM_FOR_NUMERIC
,
+ * UBIDI_KEEP_BASE_COMBINING
, UBIDI_OUTPUT_REVERSE
,
+ * UBIDI_REMOVE_BIDI_CONTROLS
, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS
and UBIDI_OPTION_REMOVE_CONTROLS
.
+ * ubidi_writeReordered()
, optionally with the
+ * #UBIDI_KEEP_BASE_COMBINING
option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param indexMap is a pointer to an array of ubidi_getProcessedLength()
+ * indexes which will reflect the reordering of the characters.
+ * If option #UBIDI_OPTION_INSERT_MARKS
is set, the number
+ * of elements allocated in indexMap
must be no less than
+ * ubidi_getResultLength()
.
+ * The array does not need to be initialized.
+ * The index map will result in indexMap[logicalIndex]==visualIndex
.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * Get a visual-to-logical index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * #UBIDI_MAP_NOWHERE
if the
+ * corresponding text characters are Bidi marks inserted in the visual output
+ * by the option #UBIDI_OPTION_INSERT_MARKS
.
+ * ubidi_writeReordered()
such as UBIDI_INSERT_LRM_FOR_NUMERIC
,
+ * UBIDI_KEEP_BASE_COMBINING
, UBIDI_OUTPUT_REVERSE
,
+ * UBIDI_REMOVE_BIDI_CONTROLS
, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as UBIDI_OPTION_INSERT_MARKS
and UBIDI_OPTION_REMOVE_CONTROLS
.
+ *
+ * @param pBiDi is the paragraph or line UBiDi
object.
+ *
+ * @param indexMap is a pointer to an array of ubidi_getResultLength()
+ * indexes which will reflect the reordering of the characters.
+ * If option #UBIDI_OPTION_REMOVE_CONTROLS
is set, the number
+ * of elements allocated in indexMap
must be no less than
+ * ubidi_getProcessedLength()
.
+ * The array does not need to be initialized.
+ * The index map will result in indexMap[visualIndex]==logicalIndex
.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using ubidi_getLogicalMap()
on a
+ * UBiDi
object.
+ *
+ * @param levels is an array with length
levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be length>0
.
+ *
+ * @param indexMap is a pointer to an array of length
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.indexMap[logicalIndex]==visualIndex
.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using ubidi_getVisualMap()
on a
+ * UBiDi
object.
+ *
+ * @param levels is an array with length
levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be length>0
.
+ *
+ * @param indexMap is a pointer to an array of length
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.indexMap[visualIndex]==logicalIndex
.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * Invert an index map.
+ * The index mapping of the first map is inverted and written to
+ * the second one.
+ *
+ * @param srcMap is an array with length
elements
+ * which defines the original mapping from a source array containing
+ * length
elements to a destination array.
+ * Some elements of the source array may have no mapping in the
+ * destination array. In that case, their value will be
+ * the special value UBIDI_MAP_NOWHERE
.
+ * All elements must be >=0 or equal to UBIDI_MAP_NOWHERE
.
+ * Some elements may have a value >= length
, if the
+ * destination array has more elements than the source array.
+ * There must be no duplicate indexes (two or more elements with the
+ * same value except UBIDI_MAP_NOWHERE
).
+ *
+ * @param destMap is an array with a number of elements equal to 1 + the highest
+ * value in srcMap
.
+ * destMap
will be filled with the inverse mapping.
+ * If element with index i in srcMap
has a value k different
+ * from UBIDI_MAP_NOWHERE
, this means that element i of
+ * the source array maps to element k in the destination array.
+ * The inverse map will have value i in its k-th element.
+ * For all elements of the destination array which do not map to
+ * an element in the source array, the corresponding element in the
+ * inverse map will have a value equal to UBIDI_MAP_NOWHERE
.
+ *
+ * @param length is the length of each array.
+ * @see UBIDI_MAP_NOWHERE
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
+
+/** option flags for ubidi_writeReordered() */
+
+/**
+ * option bit for ubidi_writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_KEEP_BASE_COMBINING 1
+
+/**
+ * option bit for ubidi_writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_DO_MIRRORING 2
+
+/**
+ * option bit for ubidi_writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * ubidi_writeReordered()
+ * first without this option, and then calling
+ * ubidi_writeReverse()
without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.ubidi_setPara()
. This length may be different from the length
+ * of the source text if option #UBIDI_OPTION_STREAMING
+ * has been set.
+ *
+ * Note that whenever the length of the text affects the execution or the
+ * result of a function, it is the processed length which must be considered,
+ * except for ubidi_setPara
(which receives unprocessed source
+ * text) and ubidi_getLength
(which returns the original length
+ * of the source text).
+ * In particular, the processed length is the one to consider in the following
+ * cases:
+ *
+ *
+ *
+ * @param pBiDi is the paragraph limit
argument of
+ * ubidi_setLine
charIndex
argument of
+ * ubidi_getParagraph
charIndex
argument of
+ * ubidi_getLevelAt
ubidi_getLevels
logicalStart
argument of
+ * ubidi_getLogicalRun
logicalIndex
argument of
+ * ubidi_getVisualIndex
*indexMap
argument of
+ * ubidi_getLogicalMap
ubidi_writeReordered
UBiDi
object.
+ *
+ * @return The length of the part of the source text processed by
+ * the last call to ubidi_setPara
.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_STREAMING
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the reordered text resulting from the last call to
+ * ubidi_setPara()
. This length may be different from the length
+ * of the source text if option #UBIDI_OPTION_INSERT_MARKS
+ * or option #UBIDI_OPTION_REMOVE_CONTROLS
has been set.
+ *
+ * This resulting length is the one to consider in the following cases:
+ *
+ *
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * visualIndex
argument of
+ * ubidi_getLogicalIndex
*indexMap
argument of
+ * ubidi_getVisualMap
ubidi_writeReordered
, or if option
+ * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
has been set.
+ *
+ * @param pBiDi is the paragraph UBiDi
object.
+ *
+ * @return The length of the reordered text resulting from
+ * the last call to ubidi_setPara
.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_INSERT_MARKS
+ * @see UBIDI_OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi);
+
+U_CDECL_BEGIN
+
+/**
+ * Callback type declaration for overriding default Bidi class values with
+ * custom ones.
+ * UBiDi
+ * object by calling the ubidi_setClassCallback()
function;
+ * then the callback will be invoked by the UBA implementation any time the
+ * class of a character is to be determined.c
if the default class has been overridden, or
+ * u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1
+ * if the standard Bidi class value for c
is to be used.
+ * @see ubidi_setClassCallback
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+typedef UCharDirection U_CALLCONV
+UBiDiClassCallback(const void *context, UChar32 c);
+
+U_CDECL_END
+
+/**
+ * Retrieve the Bidi class for a given code point.
+ * #UBiDiClassCallback
callback is defined and returns a
+ * value other than u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1
,
+ * that value is used; otherwise the default class determination mechanism is invoked.UBiDi
object.
+ *
+ * @param c is the code point whose Bidi class must be retrieved.
+ *
+ * @return The Bidi class for character c
based
+ * on the given pBiDi
instance.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
+
+/**
+ * Set the callback function and callback data used by the UBA
+ * implementation for Bidi class determination.
+ * UBiDi
object.
+ *
+ * @param newFn is the new callback function pointer.
+ *
+ * @param newContext is the new callback context pointer. This can be NULL.
+ *
+ * @param oldFn fillin: Returns the old callback function pointer. This can be
+ * NULL.
+ *
+ * @param oldContext fillin: Returns the old callback's context. This can be
+ * NULL.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+ const void *newContext, UBiDiClassCallback **oldFn,
+ const void **oldContext, UErrorCode *pErrorCode);
+
+/**
+ * Get the current callback function used for Bidi class determination.
+ *
+ * @param pBiDi is the paragraph UBiDi
object.
+ *
+ * @param fn fillin: Returns the callback function pointer.
+ *
+ * @param context fillin: Returns the callback's private context.
+ *
+ * @see ubidi_setClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
+
+/**
+ * Take a UBiDi
object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * ubidi_setPara()
or for a line of text set by
+ * ubidi_setLine()
and write a reordered string to the
+ * destination buffer.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the description of the destSize
+ * and options
parameters and of the option bit flags.
+ *
+ * @param pBiDi A pointer to a UBiDi
object that
+ * is set by ubidi_setPara()
or
+ * ubidi_setLine()
and contains the reordering
+ * information for the text that it was defined for,
+ * as well as a pointer to that text.
+ * The text was aliased (only the pointer was stored
+ * without copying the contents) and must not have been modified
+ * since the ubidi_setPara()
call.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * The source text and dest[destSize]
+ * must not overlap.
+ *
+ * @param destSize The size of the dest
buffer,
+ * in number of UChars.
+ * If the UBIDI_INSERT_LRM_FOR_NUMERIC
+ * option is set, then the destination length could be
+ * as large as
+ * ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
.
+ * If the UBIDI_REMOVE_BIDI_CONTROLS
option
+ * is set, then the destination length may be less than
+ * ubidi_getLength(pBiDi)
.
+ * If none of these options is set, then the destination length
+ * will be exactly ubidi_getProcessedLength(pBiDi)
.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * #UBIDI_DO_MIRRORING
,
+ * #UBIDI_INSERT_LRM_FOR_NUMERIC
,
+ * #UBIDI_KEEP_BASE_COMBINING
,
+ * #UBIDI_OUTPUT_REVERSE
,
+ * #UBIDI_REMOVE_BIDI_CONTROLS
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This function is the implementation for reversing RTL runs as part
+ * of ubidi_writeReordered()
. For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed srcLength
.
+ *
+ * @see ubidi_writeReordered
+ *
+ * @param src A pointer to the RTL run text.
+ *
+ * @param srcLength The length of the RTL run.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * src[srcLength]
and dest[destSize]
+ * must not overlap.
+ *
+ * @param destSize The size of the dest
buffer,
+ * in number of UChars.
+ * If the UBIDI_REMOVE_BIDI_CONTROLS
option
+ * is set, then the destination length may be less than
+ * srcLength
.
+ * If this option is not set, then the destination length
+ * will be exactly srcLength
.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * See the options
parameter in ubidi_writeReordered()
.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/*#define BIDI_SAMPLE_CODE*/
+/*@}*/
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/ubrk.h b/third_party/icu4c/ndk_headers/unicode/ubrk.h
new file mode 100644
index 00000000000..403f55bca1a
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/ubrk.h
@@ -0,0 +1,561 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1996-2015, International Business Machines Corporation and others.
+* All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * A text-break iterator.
+ * For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ /**
+ * Opaque type representing an ICU Break iterator object.
+ * @stable ICU 2.0
+ */
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * BreakIterator C API
+ *
+ * The BreakIterator C API defines methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * Calendar C API
+ *
+ * UCalendar C API is used for converting between a UDate
object
+ * and a set of integer fields such as UCAL_YEAR
, UCAL_MONTH
,
+ * UCAL_DAY
, UCAL_HOUR
, and so on.
+ * (A UDate
object represents a specific instant in
+ * time with millisecond precision. See UDate
+ * for information about the UDate
.)
+ *
+ * UCalendar
interpret a UDate
+ * according to the rules of a specific calendar system. The C API
+ * provides the enum UCalendarType with UCAL_TRADITIONAL and
+ * UCAL_GREGORIAN.
+ * ucal_open()
, which returns a pointer to
+ * UCalendar
whose time fields have been initialized
+ * with the current date and time. We need to specify the type of
+ * calendar to be opened and the timezoneId.
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ *
+ *
+ * \code
+ * UCalendar *caldef;
+ * UChar *tzId;
+ * UErrorCode status;
+ * tzId=(UChar*)malloc(sizeof(UChar) * (strlen("PST") +1) );
+ * u_uastrcpy(tzId, "PST");
+ * caldef=ucal_open(tzID, u_strlen(tzID), NULL, UCAL_TRADITIONAL, &status);
+ * \endcode
+ *
+ * \htmlonlyUCalendar
object can produce all the time field values
+ * needed to implement the date-time formatting for a particular language
+ * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional).
+ *
+ * UDate
from time fields, two special circumstances
+ * may arise: there may be insufficient information to compute the
+ * UDate
(such as only year and month but no day in the month),
+ * or there may be inconsistent information (such as "Tuesday, July 15, 1996"
+ * -- July 15, 1996 is actually a Monday).
+ *
+ * \endhtmlonly
+ *
\endhtmlonly
+ *
+ * For the time of day:
+ *
+ * \htmlonly
+ * \code
+ * UCAL_MONTH + UCAL_DAY_OF_MONTH
+ * UCAL_MONTH + UCAL_WEEK_OF_MONTH + UCAL_DAY_OF_WEEK
+ * UCAL_MONTH + UCAL_DAY_OF_WEEK_IN_MONTH + UCAL_DAY_OF_WEEK
+ * UCAL_DAY_OF_YEAR
+ * UCAL_DAY_OF_WEEK + UCAL_WEEK_OF_YEAR
+ * \endcode
+ *
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ *
+ *
+ * \code
+ * UCAL_HOUR_OF_DAY
+ * UCAL_AM_PM + UCAL_HOUR
+ * \endcode
+ *
+ * \htmlonly
+ *
+ *
+ * Calendar
provides an API for field "rolling", where fields
+ * can be incremented or decremented, but wrap around. For example, rolling the
+ * month up in the date December 12, 1996
results in
+ * January 12, 1996
.
+ *
+ * Calendar
also provides a date arithmetic function for
+ * adding the specified (signed) amount of time to a particular time field.
+ * For example, subtracting 5 days from the date September 12, 1996
+ * results in September 7, 1996
.
+ *
+ *
+ *
+ *
+ * @stable ICU 2.0
+ */
+
+/**
+ * The time zone ID reserved for unknown time zone.
+ * It behaves like the GMT/UTC time zone but has the special ID "Etc/Unknown".
+ * @stable ICU 4.8
+ */
+#define UCAL_UNKNOWN_ZONE_ID "Etc/Unknown"
+
+/** A calendar.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef void* UCalendar;
+
+/** Possible types of UCalendars
+ * @stable ICU 2.0
+ */
+enum UCalendarType {
+ /**
+ * Despite the name, UCAL_TRADITIONAL designates the locale's default calendar,
+ * which may be the Gregorian calendar or some other calendar.
+ * @stable ICU 2.0
+ */
+ UCAL_TRADITIONAL,
+ /**
+ * A better name for UCAL_TRADITIONAL.
+ * @stable ICU 4.2
+ */
+ UCAL_DEFAULT = UCAL_TRADITIONAL,
+ /**
+ * Unambiguously designates the Gregorian calendar for the locale.
+ * @stable ICU 2.0
+ */
+ UCAL_GREGORIAN
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarType UCalendarType;
+
+/** Possible fields in a UCalendar
+ * @stable ICU 2.0
+ */
+enum UCalendarDateFields {
+ /**
+ * Field number indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar.
+ * This is a calendar-specific value.
+ * @stable ICU 2.6
+ */
+ UCAL_ERA,
+
+ /**
+ * Field number indicating the year. This is a calendar-specific value.
+ * @stable ICU 2.6
+ */
+ UCAL_YEAR,
+
+ /**
+ * Field number indicating the month. This is a calendar-specific value.
+ * The first month of the year is
+ * ICU_ENABLE_TENTATIVE_ERA=true
.JANUARY
; the last depends on the number of months in a year.
+ * @see #UCAL_JANUARY
+ * @see #UCAL_FEBRUARY
+ * @see #UCAL_MARCH
+ * @see #UCAL_APRIL
+ * @see #UCAL_MAY
+ * @see #UCAL_JUNE
+ * @see #UCAL_JULY
+ * @see #UCAL_AUGUST
+ * @see #UCAL_SEPTEMBER
+ * @see #UCAL_OCTOBER
+ * @see #UCAL_NOVEMBER
+ * @see #UCAL_DECEMBER
+ * @see #UCAL_UNDECIMBER
+ * @stable ICU 2.6
+ */
+ UCAL_MONTH,
+
+ /**
+ * Field number indicating the
+ * week number within the current year. The first week of the year, as
+ * defined by UCAL_FIRST_DAY_OF_WEEK
and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+ * attributes, has value 1. Subclasses define
+ * the value of UCAL_WEEK_OF_YEAR
for days before the first week of
+ * the year.
+ * @see ucal_getAttribute
+ * @see ucal_setAttribute
+ * @stable ICU 2.6
+ */
+ UCAL_WEEK_OF_YEAR,
+
+ /**
+ * Field number indicating the
+ * week number within the current month. The first week of the month, as
+ * defined by UCAL_FIRST_DAY_OF_WEEK
and UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+ * attributes, has value 1. Subclasses define
+ * the value of WEEK_OF_MONTH
for days before the first week of
+ * the month.
+ * @see ucal_getAttribute
+ * @see ucal_setAttribute
+ * @see #UCAL_FIRST_DAY_OF_WEEK
+ * @see #UCAL_MINIMAL_DAYS_IN_FIRST_WEEK
+ * @stable ICU 2.6
+ */
+ UCAL_WEEK_OF_MONTH,
+
+ /**
+ * Field number indicating the
+ * day of the month. This is a synonym for DAY_OF_MONTH
.
+ * The first day of the month has value 1.
+ * @see #UCAL_DAY_OF_MONTH
+ * @stable ICU 2.6
+ */
+ UCAL_DATE,
+
+ /**
+ * Field number indicating the day
+ * number within the current year. The first day of the year has value 1.
+ * @stable ICU 2.6
+ */
+ UCAL_DAY_OF_YEAR,
+
+ /**
+ * Field number indicating the day
+ * of the week. This field takes values SUNDAY
,
+ * MONDAY
, TUESDAY
, WEDNESDAY
,
+ * THURSDAY
, FRIDAY
, and SATURDAY
.
+ * @see #UCAL_SUNDAY
+ * @see #UCAL_MONDAY
+ * @see #UCAL_TUESDAY
+ * @see #UCAL_WEDNESDAY
+ * @see #UCAL_THURSDAY
+ * @see #UCAL_FRIDAY
+ * @see #UCAL_SATURDAY
+ * @stable ICU 2.6
+ */
+ UCAL_DAY_OF_WEEK,
+
+ /**
+ * Field number indicating the
+ * ordinal number of the day of the week within the current month. Together
+ * with the DAY_OF_WEEK
field, this uniquely specifies a day
+ * within a month. Unlike WEEK_OF_MONTH
and
+ * WEEK_OF_YEAR
, this field's value does not depend on
+ * getFirstDayOfWeek()
or
+ * getMinimalDaysInFirstWeek()
. DAY_OF_MONTH 1
+ * through 7
always correspond to DAY_OF_WEEK_IN_MONTH
+ * 1
; 8
through 15
correspond to
+ * DAY_OF_WEEK_IN_MONTH 2
, and so on.
+ * DAY_OF_WEEK_IN_MONTH 0
indicates the week before
+ * DAY_OF_WEEK_IN_MONTH 1
. Negative values count back from the
+ * end of the month, so the last Sunday of a month is specified as
+ * DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1
. Because
+ * negative values count backward they will usually be aligned differently
+ * within the month than positive values. For example, if a month has 31
+ * days, DAY_OF_WEEK_IN_MONTH -1
will overlap
+ * DAY_OF_WEEK_IN_MONTH 5
and the end of 4
.
+ * @see #UCAL_DAY_OF_WEEK
+ * @see #UCAL_WEEK_OF_MONTH
+ * @stable ICU 2.6
+ */
+ UCAL_DAY_OF_WEEK_IN_MONTH,
+
+ /**
+ * Field number indicating
+ * whether the HOUR
is before or after noon.
+ * E.g., at 10:04:15.250 PM the AM_PM
is PM
.
+ * @see #UCAL_AM
+ * @see #UCAL_PM
+ * @see #UCAL_HOUR
+ * @stable ICU 2.6
+ */
+ UCAL_AM_PM,
+
+ /**
+ * Field number indicating the
+ * hour of the morning or afternoon. HOUR
is used for the 12-hour
+ * clock.
+ * E.g., at 10:04:15.250 PM the HOUR
is 10.
+ * @see #UCAL_AM_PM
+ * @see #UCAL_HOUR_OF_DAY
+ * @stable ICU 2.6
+ */
+ UCAL_HOUR,
+
+ /**
+ * Field number indicating the
+ * hour of the day. HOUR_OF_DAY
is used for the 24-hour clock.
+ * E.g., at 10:04:15.250 PM the HOUR_OF_DAY
is 22.
+ * @see #UCAL_HOUR
+ * @stable ICU 2.6
+ */
+ UCAL_HOUR_OF_DAY,
+
+ /**
+ * Field number indicating the
+ * minute within the hour.
+ * E.g., at 10:04:15.250 PM the UCAL_MINUTE
is 4.
+ * @stable ICU 2.6
+ */
+ UCAL_MINUTE,
+
+ /**
+ * Field number indicating the
+ * second within the minute.
+ * E.g., at 10:04:15.250 PM the UCAL_SECOND
is 15.
+ * @stable ICU 2.6
+ */
+ UCAL_SECOND,
+
+ /**
+ * Field number indicating the
+ * millisecond within the second.
+ * E.g., at 10:04:15.250 PM the UCAL_MILLISECOND
is 250.
+ * @stable ICU 2.6
+ */
+ UCAL_MILLISECOND,
+
+ /**
+ * Field number indicating the
+ * raw offset from GMT in milliseconds.
+ * @stable ICU 2.6
+ */
+ UCAL_ZONE_OFFSET,
+
+ /**
+ * Field number indicating the
+ * daylight savings offset in milliseconds.
+ * @stable ICU 2.6
+ */
+ UCAL_DST_OFFSET,
+
+ /**
+ * Field number
+ * indicating the extended year corresponding to the
+ * UCAL_WEEK_OF_YEAR
field. This may be one greater or less
+ * than the value of UCAL_EXTENDED_YEAR
.
+ * @stable ICU 2.6
+ */
+ UCAL_YEAR_WOY,
+
+ /**
+ * Field number
+ * indicating the localized day of week. This will be a value from 1
+ * to 7 inclusive, with 1 being the localized first day of the week.
+ * @stable ICU 2.6
+ */
+ UCAL_DOW_LOCAL,
+
+ /**
+ * Year of this calendar system, encompassing all supra-year fields. For example,
+ * in Gregorian/Julian calendars, positive Extended Year values indicate years AD,
+ * 1 BC = 0 extended, 2 BC = -1 extended, and so on.
+ * @stable ICU 2.8
+ */
+ UCAL_EXTENDED_YEAR,
+
+ /**
+ * Field number
+ * indicating the modified Julian day number. This is different from
+ * the conventional Julian day number in two regards. First, it
+ * demarcates days at local zone midnight, rather than noon GMT.
+ * Second, it is a local number; that is, it depends on the local time
+ * zone. It can be thought of as a single number that encompasses all
+ * the date-related fields.
+ * @stable ICU 2.8
+ */
+ UCAL_JULIAN_DAY,
+
+ /**
+ * Ranges from 0 to 23:59:59.999 (regardless of DST). This field behaves exactly
+ * like a composite of all time-related fields, not including the zone fields. As such,
+ * it also reflects discontinuities of those fields on DST transition days. On a day
+ * of DST onset, it will jump forward. On a day of DST cessation, it will jump
+ * backward. This reflects the fact that it must be combined with the DST_OFFSET field
+ * to obtain a unique local time value.
+ * @stable ICU 2.8
+ */
+ UCAL_MILLISECONDS_IN_DAY,
+
+ /**
+ * Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for
+ * an example of this.
+ */
+ UCAL_IS_LEAP_MONTH,
+
+ /**
+ * Field number indicating the
+ * day of the month. This is a synonym for UCAL_DATE
.
+ * The first day of the month has value 1.
+ * @see #UCAL_DATE
+ * Synonym for UCAL_DATE
+ * @stable ICU 2.8
+ **/
+ UCAL_DAY_OF_MONTH=UCAL_DATE
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDateFields UCalendarDateFields;
+ /**
+ * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients
+ * who create locale resources for the field of first-day-of-week should be aware of
+ * this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY.
+ */
+/** Possible days of the week in a UCalendar
+ * @stable ICU 2.0
+ */
+enum UCalendarDaysOfWeek {
+ /** Sunday */
+ UCAL_SUNDAY = 1,
+ /** Monday */
+ UCAL_MONDAY,
+ /** Tuesday */
+ UCAL_TUESDAY,
+ /** Wednesday */
+ UCAL_WEDNESDAY,
+ /** Thursday */
+ UCAL_THURSDAY,
+ /** Friday */
+ UCAL_FRIDAY,
+ /** Saturday */
+ UCAL_SATURDAY
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDaysOfWeek UCalendarDaysOfWeek;
+
+/** Possible months in a UCalendar. Note: Calendar month is 0-based.
+ * @stable ICU 2.0
+ */
+enum UCalendarMonths {
+ /** January */
+ UCAL_JANUARY,
+ /** February */
+ UCAL_FEBRUARY,
+ /** March */
+ UCAL_MARCH,
+ /** April */
+ UCAL_APRIL,
+ /** May */
+ UCAL_MAY,
+ /** June */
+ UCAL_JUNE,
+ /** July */
+ UCAL_JULY,
+ /** August */
+ UCAL_AUGUST,
+ /** September */
+ UCAL_SEPTEMBER,
+ /** October */
+ UCAL_OCTOBER,
+ /** November */
+ UCAL_NOVEMBER,
+ /** December */
+ UCAL_DECEMBER,
+ /** Value of the UCAL_MONTH
field indicating the
+ * thirteenth month of the year. Although the Gregorian calendar
+ * does not use this value, lunar calendars do.
+ */
+ UCAL_UNDECIMBER
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarMonths UCalendarMonths;
+
+/** Possible AM/PM values in a UCalendar
+ * @stable ICU 2.0
+ */
+enum UCalendarAMPMs {
+ /** AM */
+ UCAL_AM,
+ /** PM */
+ UCAL_PM
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarAMPMs UCalendarAMPMs;
+
+/**
+ * System time zone type constants used by filtering zones
+ * in ucal_openTimeZoneIDEnumeration.
+ * @see ucal_openTimeZoneIDEnumeration
+ * @stable ICU 4.8
+ */
+enum USystemTimeZoneType {
+ /**
+ * Any system zones.
+ * @stable ICU 4.8
+ */
+ UCAL_ZONE_TYPE_ANY,
+ /**
+ * Canonical system zones.
+ * @stable ICU 4.8
+ */
+ UCAL_ZONE_TYPE_CANONICAL,
+ /**
+ * Canonical system zones associated with actual locations.
+ * @stable ICU 4.8
+ */
+ UCAL_ZONE_TYPE_CANONICAL_LOCATION
+};
+
+/** @stable ICU 4.8 */
+typedef enum USystemTimeZoneType USystemTimeZoneType;
+
+/**
+ * Create an enumeration over system time zone IDs with the given
+ * filter conditions.
+ * @param zoneType The system time zone type.
+ * @param region The ISO 3166 two-letter country code or UN M.49
+ * three-digit area code. When NULL, no filtering
+ * done by region.
+ * @param rawOffset An offset from GMT in milliseconds, ignoring the
+ * effect of daylight savings time, if any. When NULL,
+ * no filtering done by zone offset.
+ * @param ec A pointer to an UErrorCode to receive any errors
+ * @return an enumeration object that the caller must dispose of
+ * using enum_close(), or NULL upon failure. In case of failure,
+ * *ec will indicate the error.
+ * @stable ICU 4.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucal_openTimeZoneIDEnumeration(USystemTimeZoneType zoneType, const char* region,
+ const int32_t* rawOffset, UErrorCode* ec);
+
+/**
+ * Create an enumeration over all time zones.
+ *
+ * @param ec input/output error code
+ *
+ * @return an enumeration object that the caller must dispose of using
+ * uenum_close(), or NULL upon failure. In case of failure *ec will
+ * indicate the error.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucal_openTimeZones(UErrorCode* ec);
+
+/**
+ * Create an enumeration over all time zones associated with the given
+ * country. Some zones are affiliated with no country (e.g., "UTC");
+ * these may also be retrieved, as a group.
+ *
+ * @param country the ISO 3166 two-letter country code, or NULL to
+ * retrieve zones not affiliated with any country
+ *
+ * @param ec input/output error code
+ *
+ * @return an enumeration object that the caller must dispose of using
+ * uenum_close(), or NULL upon failure. In case of failure *ec will
+ * indicate the error.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucal_openCountryTimeZones(const char* country, UErrorCode* ec);
+
+/**
+ * Return the default time zone. The default is determined initially
+ * by querying the host operating system. If the host system detection
+ * routines fail, or if they specify a TimeZone or TimeZone offset
+ * which is not recognized, then the special TimeZone "Etc/Unknown"
+ * is returned.
+ *
+ * The default may be changed with `ucal_setDefaultTimeZone()` or with
+ * the C++ TimeZone API, `TimeZone::adoptDefault(TimeZone*)`.
+ *
+ * @param result A buffer to receive the result, or NULL
+ *
+ * @param resultCapacity The capacity of the result buffer
+ *
+ * @param ec input/output error code
+ *
+ * @return The result string length, not including the terminating
+ * null
+ *
+ * @see #UCAL_UNKNOWN_ZONE_ID
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec);
+
+/**
+ * Set the default time zone.
+ *
+ * @param zoneID null-terminated time zone ID
+ *
+ * @param ec input/output error code
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec);
+
+/**
+ * Return the current host time zone. The host time zone is detected from
+ * the current host system configuration by querying the host operating
+ * system. If the host system detection routines fail, or if they specify
+ * a TimeZone or TimeZone offset which is not recognized, then the special
+ * TimeZone "Etc/Unknown" is returned.
+ *
+ * Note that host time zone and the ICU default time zone can be different.
+ *
+ * The ICU default time zone does not change once initialized unless modified
+ * by calling `ucal_setDefaultTimeZone()` or with the C++ TimeZone API,
+ * `TimeZone::adoptDefault(TimeZone*)`.
+ *
+ * If the host operating system configuration has changed since ICU has
+ * initialized then the returned value can be different than the ICU default
+ * time zone, even if the default has not changed.
+ *
+ * UCAL_UNKNOWN_ZONE_ID
("Etc/Unknown") without any errors/warnings. If you want
+ * to check if a TimeZone ID is valid prior to this function, use ucal_getCanonicalTimeZoneID
.
+ *
+ * @param zoneID The desired TimeZone ID. If 0, use the default time zone.
+ * @param len The length of zoneID, or -1 if null-terminated.
+ * @param locale The desired locale
+ * @param type The type of UCalendar to open. This can be UCAL_GREGORIAN to open the Gregorian
+ * calendar for the locale, or UCAL_DEFAULT to open the default calendar for the locale (the
+ * default calendar may also be Gregorian). To open a specific non-Gregorian calendar for the
+ * locale, use uloc_setKeywordValue to set the value of the calendar keyword for the locale
+ * and then pass the locale to ucal_open with UCAL_DEFAULT as the type.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return A pointer to a UCalendar, or 0 if an error occurred.
+ * @see #UCAL_UNKNOWN_ZONE_ID
+ * @stable ICU 2.0
+ */
+U_CAPI UCalendar* U_EXPORT2
+ucal_open(const UChar* zoneID,
+ int32_t len,
+ const char* locale,
+ UCalendarType type,
+ UErrorCode* status);
+
+/**
+ * Close a UCalendar.
+ * Once closed, a UCalendar may no longer be used.
+ * @param cal The UCalendar to close.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_close(UCalendar *cal);
+
+/**
+ * Open a copy of a UCalendar.
+ * This function performs a deep copy.
+ * @param cal The calendar to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UCalendar identical to cal.
+ * @stable ICU 4.0
+ */
+U_CAPI UCalendar* U_EXPORT2
+ucal_clone(const UCalendar* cal,
+ UErrorCode* status);
+
+/**
+ * Set the TimeZone used by a UCalendar.
+ * A UCalendar uses a timezone for converting from Greenwich time to local time.
+ * @param cal The UCalendar to set.
+ * @param zoneID The desired TimeZone ID. If 0, use the default time zone.
+ * @param len The length of zoneID, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_setTimeZone(UCalendar* cal,
+ const UChar* zoneID,
+ int32_t len,
+ UErrorCode* status);
+
+/**
+ * Get the ID of the UCalendar's time zone.
+ *
+ * @param cal The UCalendar to query.
+ * @param result Receives the UCalendar's time zone ID.
+ * @param resultLength The maximum size of result.
+ * @param status Receives the status.
+ * @return The total buffer size needed; if greater than resultLength, the output was truncated.
+ * @stable ICU 51
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getTimeZoneID(const UCalendar *cal,
+ UChar *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Possible formats for a UCalendar's display name
+ * @stable ICU 2.0
+ */
+enum UCalendarDisplayNameType {
+ /** Standard display name */
+ UCAL_STANDARD,
+ /** Short standard display name */
+ UCAL_SHORT_STANDARD,
+ /** Daylight savings display name */
+ UCAL_DST,
+ /** Short daylight savings display name */
+ UCAL_SHORT_DST
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarDisplayNameType UCalendarDisplayNameType;
+
+/**
+ * Get the display name for a UCalendar's TimeZone.
+ * A display name is suitable for presentation to a user.
+ * @param cal The UCalendar to query.
+ * @param type The desired display name format; one of UCAL_STANDARD, UCAL_SHORT_STANDARD,
+ * UCAL_DST, UCAL_SHORT_DST
+ * @param locale The desired locale for the display name.
+ * @param result A pointer to a buffer to receive the formatted number.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The total buffer size needed; if greater than resultLength, the output was truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getTimeZoneDisplayName(const UCalendar* cal,
+ UCalendarDisplayNameType type,
+ const char* locale,
+ UChar* result,
+ int32_t resultLength,
+ UErrorCode* status);
+
+/**
+ * Determine if a UCalendar is currently in daylight savings time.
+ * Daylight savings time is not used in all parts of the world.
+ * @param cal The UCalendar to query.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return true if cal is currently in daylight savings time, false otherwise
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucal_inDaylightTime(const UCalendar* cal,
+ UErrorCode* status );
+
+/**
+ * Sets the GregorianCalendar change date. This is the point when the switch from
+ * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+ * 15, 1582. Previous to this time and date will be Julian dates.
+ *
+ * This function works only for Gregorian calendars. If the UCalendar is not
+ * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
+ * error code is set.
+ *
+ * @param cal The calendar object.
+ * @param date The given Gregorian cutover date.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ *
+ * @see GregorianCalendar::setGregorianChange
+ * @see ucal_getGregorianChange
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode);
+
+/**
+ * Gets the Gregorian Calendar change date. This is the point when the switch from
+ * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October
+ * 15, 1582. Previous to this time and date will be Julian dates.
+ *
+ * This function works only for Gregorian calendars. If the UCalendar is not
+ * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR
+ * error code is set.
+ *
+ * @param cal The calendar object.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The Gregorian cutover time for this calendar.
+ *
+ * @see GregorianCalendar::getGregorianChange
+ * @see ucal_setGregorianChange
+ * @stable ICU 3.6
+ */
+U_CAPI UDate U_EXPORT2
+ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode);
+
+/**
+ * Types of UCalendar attributes
+ * @stable ICU 2.0
+ */
+enum UCalendarAttribute {
+ /**
+ * Lenient parsing
+ * @stable ICU 2.0
+ */
+ UCAL_LENIENT,
+ /**
+ * First day of week
+ * @stable ICU 2.0
+ */
+ UCAL_FIRST_DAY_OF_WEEK,
+ /**
+ * Minimum number of days in first week
+ * @stable ICU 2.0
+ */
+ UCAL_MINIMAL_DAYS_IN_FIRST_WEEK,
+ /**
+ * The behavior for handling wall time repeating multiple times
+ * at negative time zone offset transitions
+ * @stable ICU 49
+ */
+ UCAL_REPEATED_WALL_TIME,
+ /**
+ * The behavior for handling skipped wall time at positive time
+ * zone offset transitions.
+ * @stable ICU 49
+ */
+ UCAL_SKIPPED_WALL_TIME
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarAttribute UCalendarAttribute;
+
+/**
+ * Options for handling ambiguous wall time at time zone
+ * offset transitions.
+ * @stable ICU 49
+ */
+enum UCalendarWallTimeOption {
+ /**
+ * An ambiguous wall time to be interpreted as the latest.
+ * This option is valid for UCAL_REPEATED_WALL_TIME and
+ * UCAL_SKIPPED_WALL_TIME.
+ * @stable ICU 49
+ */
+ UCAL_WALLTIME_LAST,
+ /**
+ * An ambiguous wall time to be interpreted as the earliest.
+ * This option is valid for UCAL_REPEATED_WALL_TIME and
+ * UCAL_SKIPPED_WALL_TIME.
+ * @stable ICU 49
+ */
+ UCAL_WALLTIME_FIRST,
+ /**
+ * An ambiguous wall time to be interpreted as the next valid
+ * wall time. This option is valid for UCAL_SKIPPED_WALL_TIME.
+ * @stable ICU 49
+ */
+ UCAL_WALLTIME_NEXT_VALID
+};
+/** @stable ICU 49 */
+typedef enum UCalendarWallTimeOption UCalendarWallTimeOption;
+
+/**
+ * Get a numeric attribute associated with a UCalendar.
+ * Numeric attributes include the first day of the week, or the minimal numbers
+ * of days in the first week of the month.
+ * @param cal The UCalendar to query.
+ * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
+ * UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME
+ * @return The value of attr.
+ * @see ucal_setAttribute
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getAttribute(const UCalendar* cal,
+ UCalendarAttribute attr);
+
+/**
+ * Set a numeric attribute associated with a UCalendar.
+ * Numeric attributes include the first day of the week, or the minimal numbers
+ * of days in the first week of the month.
+ * @param cal The UCalendar to set.
+ * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK,
+ * UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME
+ * @param newValue The new value of attr.
+ * @see ucal_getAttribute
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_setAttribute(UCalendar* cal,
+ UCalendarAttribute attr,
+ int32_t newValue);
+
+/**
+ * Get a locale for which calendars are available.
+ * A UCalendar in a locale returned by this function will contain the correct
+ * day and month names for the locale.
+ * @param localeIndex The index of the desired locale.
+ * @return A locale for which calendars are available, or 0 if none.
+ * @see ucal_countAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+ucal_getAvailable(int32_t localeIndex);
+
+/**
+ * Determine how many locales have calendars available.
+ * This function is most useful as determining the loop ending condition for
+ * calls to \ref ucal_getAvailable.
+ * @return The number of locales for which calendars are available.
+ * @see ucal_getAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_countAvailable(void);
+
+/**
+ * Get a UCalendar's current time in millis.
+ * The time is represented as milliseconds from the epoch.
+ * @param cal The UCalendar to query.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The calendar's current time in millis.
+ * @see ucal_setMillis
+ * @see ucal_setDate
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_CAPI UDate U_EXPORT2
+ucal_getMillis(const UCalendar* cal,
+ UErrorCode* status);
+
+/**
+ * Set a UCalendar's current time in millis.
+ * The time is represented as milliseconds from the epoch.
+ * @param cal The UCalendar to set.
+ * @param dateTime The desired date and time.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setDate
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_setMillis(UCalendar* cal,
+ UDate dateTime,
+ UErrorCode* status );
+
+/**
+ * Set a UCalendar's current date.
+ * The date is represented as a series of 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param year The desired year.
+ * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
+ * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
+ * @param date The desired day of the month.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setMillis
+ * @see ucal_setDateTime
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_setDate(UCalendar* cal,
+ int32_t year,
+ int32_t month,
+ int32_t date,
+ UErrorCode* status);
+
+/**
+ * Set a UCalendar's current date.
+ * The date is represented as a series of 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param year The desired year.
+ * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY,
+ * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER
+ * @param date The desired day of the month.
+ * @param hour The desired hour of day.
+ * @param minute The desired minute.
+ * @param second The desirec second.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_getMillis
+ * @see ucal_setMillis
+ * @see ucal_setDate
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_setDateTime(UCalendar* cal,
+ int32_t year,
+ int32_t month,
+ int32_t date,
+ int32_t hour,
+ int32_t minute,
+ int32_t second,
+ UErrorCode* status);
+
+/**
+ * Returns true if two UCalendars are equivalent. Equivalent
+ * UCalendars will behave identically, but they may be set to
+ * different times.
+ * @param cal1 The first of the UCalendars to compare.
+ * @param cal2 The second of the UCalendars to compare.
+ * @return true if cal1 and cal2 are equivalent, false otherwise.
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucal_equivalentTo(const UCalendar* cal1,
+ const UCalendar* cal2);
+
+/**
+ * Add a specified signed amount to a particular field in a UCalendar.
+ * This can modify more significant fields in the calendar.
+ * Adding a positive value always means moving forward in time, so for the Gregorian calendar,
+ * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces
+ * the numeric value of the field itself).
+ * @param cal The UCalendar to which to add.
+ * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param amount The signed amount to add to field. If the amount causes the value
+ * to exceed to maximum or minimum values for that field, other fields are modified
+ * to preserve the magnitude of the change.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_roll
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_add(UCalendar* cal,
+ UCalendarDateFields field,
+ int32_t amount,
+ UErrorCode* status);
+
+/**
+ * Add a specified signed amount to a particular field in a UCalendar.
+ * This will not modify more significant fields in the calendar.
+ * Rolling by a positive value always means moving forward in time (unless the limit of the
+ * field is reached, in which case it may pin or wrap), so for Gregorian calendar,
+ * starting with 100 BC and rolling the year by +1 results in 99 BC.
+ * When eras have a definite beginning and end (as in the Chinese calendar, or as in most eras in the
+ * Japanese calendar) then rolling the year past either limit of the era will cause the year to wrap around.
+ * When eras only have a limit at one end, then attempting to roll the year past that limit will result in
+ * pinning the year at that limit. Note that for most calendars in which era 0 years move forward in time
+ * (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to result in negative years for
+ * era 0 (that is the only way to represent years before the calendar epoch).
+ * @param cal The UCalendar to which to add.
+ * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param amount The signed amount to add to field. If the amount causes the value
+ * to exceed to maximum or minimum values for that field, the field is pinned to a permissible
+ * value.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @see ucal_add
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_roll(UCalendar* cal,
+ UCalendarDateFields field,
+ int32_t amount,
+ UErrorCode* status);
+
+/**
+ * Get the current value of a field from a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The value of the desired field.
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_get(const UCalendar* cal,
+ UCalendarDateFields field,
+ UErrorCode* status );
+
+/**
+ * Set the value of a field in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to set.
+ * @param field The field to set; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param value The desired value of field.
+ * @see ucal_get
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_set(UCalendar* cal,
+ UCalendarDateFields field,
+ int32_t value);
+
+/**
+ * Determine if a field in a UCalendar is set.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @return true if field is set, false otherwise.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_clearField
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucal_isSet(const UCalendar* cal,
+ UCalendarDateFields field);
+
+/**
+ * Clear a field in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param cal The UCalendar containing the field to clear.
+ * @param field The field to clear; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clear
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_clearField(UCalendar* cal,
+ UCalendarDateFields field);
+
+/**
+ * Clear all fields in a UCalendar.
+ * All fields are represented as 32-bit integers.
+ * @param calendar The UCalendar to clear.
+ * @see ucal_get
+ * @see ucal_set
+ * @see ucal_isSet
+ * @see ucal_clearField
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucal_clear(UCalendar* calendar);
+
+/**
+ * Possible limit values for a UCalendar
+ * @stable ICU 2.0
+ */
+enum UCalendarLimitType {
+ /** Minimum value */
+ UCAL_MINIMUM,
+ /** Maximum value */
+ UCAL_MAXIMUM,
+ /** Greatest minimum value */
+ UCAL_GREATEST_MINIMUM,
+ /** Least maximum value */
+ UCAL_LEAST_MAXIMUM,
+ /** Actual minimum value */
+ UCAL_ACTUAL_MINIMUM,
+ /** Actual maximum value */
+ UCAL_ACTUAL_MAXIMUM
+};
+
+/** @stable ICU 2.0 */
+typedef enum UCalendarLimitType UCalendarLimitType;
+
+/**
+ * Determine a limit for a field in a UCalendar.
+ * A limit is a maximum or minimum value for a field.
+ * @param cal The UCalendar to query.
+ * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param type The desired critical point; one of UCAL_MINIMUM, UCAL_MAXIMUM, UCAL_GREATEST_MINIMUM,
+ * UCAL_LEAST_MAXIMUM, UCAL_ACTUAL_MINIMUM, UCAL_ACTUAL_MAXIMUM
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The requested value.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getLimit(const UCalendar* cal,
+ UCalendarDateFields field,
+ UCalendarLimitType type,
+ UErrorCode* status);
+
+/** Get the locale for this calendar object. You can choose between valid and actual locale.
+ * @param cal The calendar object
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_CAPI const char * U_EXPORT2
+ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status);
+
+/**
+ * Returns the timezone data version currently used by ICU.
+ * @param status error code for the operation
+ * @return the version string, such as "2007f"
+ * @stable ICU 3.8
+ */
+U_CAPI const char * U_EXPORT2
+ucal_getTZDataVersion(UErrorCode* status);
+
+/**
+ * Returns the canonical system timezone ID or the normalized
+ * custom time zone ID for the given time zone ID.
+ * @param id The input timezone ID to be canonicalized.
+ * @param len The length of id, or -1 if null-terminated.
+ * @param result The buffer receives the canonical system timezone ID
+ * or the custom timezone ID in normalized format.
+ * @param resultCapacity The capacity of the result buffer.
+ * @param isSystemID Receives if the given ID is a known system
+ * timezone ID.
+ * @param status Receives the status. When the given timezone ID
+ * is neither a known system time zone ID nor a
+ * valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR
+ * is set.
+ * @return The result string length, not including the terminating
+ * null.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len,
+ UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status);
+/**
+ * Get the resource keyword value string designating the calendar type for the UCalendar.
+ * @param cal The UCalendar to query.
+ * @param status The error code for the operation.
+ * @return The resource keyword value string.
+ * @stable ICU 4.2
+ */
+U_CAPI const char * U_EXPORT2
+ucal_getType(const UCalendar *cal, UErrorCode* status);
+
+/**
+ * Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "calendar" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @param status error status
+ * @return a string enumeration over keyword values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucal_getKeywordValuesForLocale(const char* key,
+ const char* locale,
+ UBool commonlyUsed,
+ UErrorCode* status);
+
+
+/** Weekday types, as returned by ucal_getDayOfWeekType().
+ * @stable ICU 4.4
+ */
+enum UCalendarWeekdayType {
+ /**
+ * Designates a full weekday (no part of the day is included in the weekend).
+ * @stable ICU 4.4
+ */
+ UCAL_WEEKDAY,
+ /**
+ * Designates a full weekend day (the entire day is included in the weekend).
+ * @stable ICU 4.4
+ */
+ UCAL_WEEKEND,
+ /**
+ * Designates a day that starts as a weekday and transitions to the weekend.
+ * Call ucal_getWeekendTransition() to get the time of transition.
+ * @stable ICU 4.4
+ */
+ UCAL_WEEKEND_ONSET,
+ /**
+ * Designates a day that starts as the weekend and transitions to a weekday.
+ * Call ucal_getWeekendTransition() to get the time of transition.
+ * @stable ICU 4.4
+ */
+ UCAL_WEEKEND_CEASE
+};
+
+/** @stable ICU 4.4 */
+typedef enum UCalendarWeekdayType UCalendarWeekdayType;
+
+/**
+ * Returns whether the given day of the week is a weekday, a weekend day,
+ * or a day that transitions from one to the other, for the locale and
+ * calendar system associated with this UCalendar (the locale's region is
+ * often the most determinant factor). If a transition occurs at midnight,
+ * then the days before and after the transition will have the
+ * type UCAL_WEEKDAY or UCAL_WEEKEND. If a transition occurs at a time
+ * other than midnight, then the day of the transition will have
+ * the type UCAL_WEEKEND_ONSET or UCAL_WEEKEND_CEASE. In this case, the
+ * function ucal_getWeekendTransition() will return the point of
+ * transition.
+ * @param cal The UCalendar to query.
+ * @param dayOfWeek The day of the week whose type is desired (UCAL_SUNDAY..UCAL_SATURDAY).
+ * @param status The error code for the operation.
+ * @return The UCalendarWeekdayType for the day of the week.
+ * @stable ICU 4.4
+ */
+U_CAPI UCalendarWeekdayType U_EXPORT2
+ucal_getDayOfWeekType(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode* status);
+
+/**
+ * Returns the time during the day at which the weekend begins or ends in
+ * this calendar system. If ucal_getDayOfWeekType() returns UCAL_WEEKEND_ONSET
+ * for the specified dayOfWeek, return the time at which the weekend begins.
+ * If ucal_getDayOfWeekType() returns UCAL_WEEKEND_CEASE for the specified dayOfWeek,
+ * return the time at which the weekend ends. If ucal_getDayOfWeekType() returns
+ * some other UCalendarWeekdayType for the specified dayOfWeek, is it an error condition
+ * (U_ILLEGAL_ARGUMENT_ERROR).
+ * @param cal The UCalendar to query.
+ * @param dayOfWeek The day of the week for which the weekend transition time is
+ * desired (UCAL_SUNDAY..UCAL_SATURDAY).
+ * @param status The error code for the operation.
+ * @return The milliseconds after midnight at which the weekend begins or ends.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getWeekendTransition(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode *status);
+
+/**
+ * Returns true if the given UDate is in the weekend in
+ * this calendar system.
+ * @param cal The UCalendar to query.
+ * @param date The UDate in question.
+ * @param status The error code for the operation.
+ * @return true if the given UDate is in the weekend in
+ * this calendar system, false otherwise.
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+ucal_isWeekend(const UCalendar *cal, UDate date, UErrorCode *status);
+
+/**
+ * Return the difference between the target time and the time this calendar object is currently set to.
+ * If the target time is after the current calendar setting, the the returned value will be positive.
+ * The field parameter specifies the units of the return value. For example, if field is UCAL_MONTH
+ * and ucal_getFieldDifference returns 3, then the target time is 3 to less than 4 months after the
+ * current calendar setting.
+ *
+ * As a side effect of this call, this calendar is advanced toward target by the given amount. That is,
+ * calling this function has the side effect of calling ucal_add on this calendar with the specified
+ * field and an amount equal to the return value from this function.
+ *
+ * A typical way of using this function is to call it first with the largest field of interest, then
+ * with progressively smaller fields.
+ *
+ * @param cal The UCalendar to compare and update.
+ * @param target The target date to compare to the current calendar setting.
+ * @param field The field to compare; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
+ * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK,
+ * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND,
+ * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The date difference for the specified field.
+ * @stable ICU 4.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucal_getFieldDifference(UCalendar* cal,
+ UDate target,
+ UCalendarDateFields field,
+ UErrorCode* status);
+
+/**
+ * Time zone transition types for ucal_getTimeZoneTransitionDate
+ * @stable ICU 50
+ */
+enum UTimeZoneTransitionType {
+ /**
+ * Get the next transition after the current date,
+ * i.e. excludes the current date
+ * @stable ICU 50
+ */
+ UCAL_TZ_TRANSITION_NEXT,
+ /**
+ * Get the next transition on or after the current date,
+ * i.e. may include the current date
+ * @stable ICU 50
+ */
+ UCAL_TZ_TRANSITION_NEXT_INCLUSIVE,
+ /**
+ * Get the previous transition before the current date,
+ * i.e. excludes the current date
+ * @stable ICU 50
+ */
+ UCAL_TZ_TRANSITION_PREVIOUS,
+ /**
+ * Get the previous transition on or before the current date,
+ * i.e. may include the current date
+ * @stable ICU 50
+ */
+ UCAL_TZ_TRANSITION_PREVIOUS_INCLUSIVE
+};
+
+typedef enum UTimeZoneTransitionType UTimeZoneTransitionType; /**< @stable ICU 50 */
+
+/**
+* Get the UDate for the next/previous time zone transition relative to
+* the calendar's current date, in the time zone to which the calendar
+* is currently set. If there is no known time zone transition of the
+* requested type relative to the calendar's date, the function returns
+* false.
+* @param cal The UCalendar to query.
+* @param type The type of transition desired.
+* @param transition A pointer to a UDate to be set to the transition time.
+* If the function returns false, the value set is unspecified.
+* @param status A pointer to a UErrorCode to receive any errors.
+* @return true if a valid transition time is set in *transition, false
+* otherwise.
+* @stable ICU 50
+*/
+U_CAPI UBool U_EXPORT2
+ucal_getTimeZoneTransitionDate(const UCalendar* cal, UTimeZoneTransitionType type,
+ UDate* transition, UErrorCode* status);
+
+/**
+* Converts a system time zone ID to an equivalent Windows time zone ID. For example,
+* Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles".
+*
+* id
, or -1 if null-terminated.
+* @param winid A buffer to receive a Windows time zone ID.
+* @param winidCapacity The capacity of the result buffer winid
.
+* @param status Receives the status.
+* @return The result string length, not including the terminating null.
+* @see ucal_getTimeZoneIDForWindowsID
+*
+* @stable ICU 52
+*/
+U_CAPI int32_t U_EXPORT2
+ucal_getWindowsTimeZoneID(const UChar* id, int32_t len,
+ UChar* winid, int32_t winidCapacity, UErrorCode* status);
+
+/**
+* Converts a Windows time zone ID to an equivalent system time zone ID
+* for a region. For example, system time zone ID "America/Los_Angeles" is returned
+* for input Windows ID "Pacific Standard Time" and region "US" (or null
),
+* "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and
+* region "CA".
+*
+* winid
, or -1 if null-terminated.
+* @param region A null-terminated region code, or NULL
if no regional preference.
+* @param id A buffer to receive a system time zone ID.
+* @param idCapacity The capacity of the result buffer id
.
+* @param status Receives the status.
+* @return The result string length, not including the terminating null.
+* @see ucal_getWindowsTimeZoneID
+*
+* @stable ICU 52
+*/
+U_CAPI int32_t U_EXPORT2
+ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* region,
+ UChar* id, int32_t idCapacity, UErrorCode* status);
+
+/**
+ * Options used by ucal_getTimeZoneOffsetFromLocal and BasicTimeZone::getOffsetFromLocal()
+ * to specify how to interpret an input time when it does not exist, or when it is ambiguous,
+ * around a time zone transition.
+ * @stable ICU 69
+ */
+enum UTimeZoneLocalOption {
+ /**
+ * An input time is always interpreted as local time before
+ * a time zone transition.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_FORMER = 0x04,
+ /**
+ * An input time is always interpreted as local time after
+ * a time zone transition.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_LATTER = 0x0C,
+ /**
+ * An input time is interpreted as standard time when local
+ * time is switched to/from daylight saving time. When both
+ * sides of a time zone transition are standard time,
+ * or daylight saving time, the local time before the
+ * transition is used.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_STANDARD_FORMER = UCAL_TZ_LOCAL_FORMER | 0x01,
+ /**
+ * An input time is interpreted as standard time when local
+ * time is switched to/from daylight saving time. When both
+ * sides of a time zone transition are standard time,
+ * or daylight saving time, the local time after the
+ * transition is used.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_STANDARD_LATTER = UCAL_TZ_LOCAL_LATTER | 0x01,
+ /**
+ * An input time is interpreted as daylight saving time when
+ * local time is switched to/from standard time. When both
+ * sides of a time zone transition are standard time,
+ * or daylight saving time, the local time before the
+ * transition is used.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_DAYLIGHT_FORMER = UCAL_TZ_LOCAL_FORMER | 0x03,
+ /**
+ * An input time is interpreted as daylight saving time when
+ * local time is switched to/from standard time. When both
+ * sides of a time zone transition are standard time,
+ * or daylight saving time, the local time after the
+ * transition is used.
+ * @stable ICU 69
+ */
+ UCAL_TZ_LOCAL_DAYLIGHT_LATTER = UCAL_TZ_LOCAL_LATTER | 0x03,
+};
+typedef enum UTimeZoneLocalOption UTimeZoneLocalOption; /**< @stable ICU 69 */
+
+/**
+* Returns the time zone raw and GMT offset for the given moment
+* in time. Upon return, local-millis = GMT-millis + rawOffset +
+* dstOffset. All computations are performed in the proleptic
+* Gregorian calendar.
+*
+* @param cal The UCalendar which specify the local date and time value to query.
+* @param nonExistingTimeOpt The option to indicate how to interpret the date and
+* time in the calendar represent a local time that skipped at a positive time
+* zone transitions (e.g. when the daylight saving time starts or the time zone
+* offset is increased due to a time zone rule change).
+* @param duplicatedTimeOpt The option to indicate how to interpret the date and
+* time in the calendar represent a local time that repeating multiple times at a
+* negative time zone transition (e.g. when the daylight saving time ends or the
+* time zone offset is decreased due to a time zone rule change)
+* @param rawOffset output parameter to receive the raw offset, that
+* is, the offset not including DST adjustments.
+* If the status is set to one of the error code, the value set is unspecified.
+* @param dstOffset output parameter to receive the DST offset,
+* that is, the offset to be added to `rawOffset' to obtain the
+* total offset between local and GMT time. If DST is not in
+* effect, this value is zero; otherwise it is a positive value,
+* typically one hour.
+* If the status is set to one of the error code, the value set is unspecified.
+* @param status A pointer to a UErrorCode to receive any errors.
+* @stable ICU 69
+*/
+U_CAPI void U_EXPORT2
+ucal_getTimeZoneOffsetFromLocal(
+ const UCalendar* cal,
+ UTimeZoneLocalOption nonExistingTimeOpt,
+ UTimeZoneLocalOption duplicatedTimeOpt,
+ int32_t* rawOffset, int32_t* dstOffset, UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/uchar.h b/third_party/icu4c/ndk_headers/unicode/uchar.h
new file mode 100644
index 00000000000..a6605f52585
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/uchar.h
@@ -0,0 +1,3931 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "15.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties).
+ *
+ * Many properties are accessible via generic functions that take a UProperty selector.
+ * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
+ * - u_getIntPropertyValue() returns an integer value per property and code point.
+ * For each supported enumerated or catalog property, there is
+ * an enum type for all of the property's values, and
+ * u_getIntPropertyValue() returns the numeric values of those constants.
+ * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
+ * all code points for which the property is true.
+ * - u_getIntPropertyMap() returns a map for each
+ * ICU-supported enumerated/catalog/int-valued property which
+ * maps all Unicode code points to their values for that property.
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct: u_ispunct(c)
+ * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl: u_charType(c)==U_CONTROL_CHAR
+ * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ *
+ * For details about the properties see
+ * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_0<=code<=0x10ffff
.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength ==sizeof(buffer)
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for U_SUCCESS()
after u_charName()
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, pErrorCode
+ * is set to U_INVALID_CHAR_FOUND
.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns false, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return true if the enumeration should continue, false to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_CAPI UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=whichc
is not a valid digit in the specified
+ * radix, -1
is returned. A character is a valid digit
+ * if at least one of the following is true:
+ *
+ *
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of 'A'
through 'Z'
.
+ * In this case the value is c-'A'+10
.'a'
through 'z'
.
+ * In this case the value is ch-'a'+10
.radix
is not a
+ * valid radix, or the value of digit
is not a valid
+ * digit in the specified radix, the null character
+ * (U+0000
) is returned.
+ * radix
argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The digit
argument is valid if
+ * 0 <= digit < radix
.
+ * '0' + digit
is returned. Otherwise, the value
+ * 'a' + digit - 10
is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the char
representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be 0<=c<=0x10ffff
.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity ==sizeof(dest)
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+#endif
+
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/third_party/icu4c/ndk_headers/unicode/ucol.h b/third_party/icu4c/ndk_headers/unicode/ucol.h
new file mode 100644
index 00000000000..080d0c91f98
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/ucol.h
@@ -0,0 +1,1234 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (c) 1996-2015, International Business Machines Corporation and others.
+* All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef UCOL_H
+#define UCOL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unorm.h"
+#include "unicode/parseerr.h"
+#include "unicode/uloc.h"
+#include "unicode/uset.h"
+#include "unicode/uscript.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Collator
+ *
+ * Collator C API
+ *
+ * The C API for Collator performs locale-sensitive
+ * string comparison. You use this service to build
+ * searching and sorting routines for natural language text.
+ *
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower
+ * bounds with one level would include "Smith", "SMITH", "sMiTh".
+ * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
+ * is produced, strings matched would be as above. However, if bound
+ * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.
+ * For more on usage, see example in cintltst/capitst.c in procedure
+ * TestBounds.
+ * Sort keys may be compared using strcmp.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * (If an unmodified sortkey is passed, it is always null
+ * terminated).
+ * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
+ * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
+ * produces upper bound that matches strings of the same length
+ * or UCOL_BOUND_UPPER_LONG that matches strings that have the
+ * same starting substring as the source string.
+ * @param noOfLevels Number of levels required in the resulting bound (for most
+ * uses, the recommended value is 1). See users guide for
+ * explanation on number of levels a sortkey can have.
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong. If the
+ * number of levels requested is higher than the number of levels
+ * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
+ * issued.
+ * @return The size needed to fully store the bound.
+ * @see ucol_keyHashCode
+ * @stable ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getBound(const uint8_t *source,
+ int32_t sourceLength,
+ UColBoundMode boundType,
+ uint32_t noOfLevels,
+ uint8_t *result,
+ int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Gets the version information for a Collator. Version is currently
+ * an opaque 32-bit number which depends, among other things, on major
+ * versions of the collator tailoring and UCA.
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucol_getVersion(const UCollator* coll, UVersionInfo info);
+
+/**
+ * Gets the UCA version information for a Collator. Version is the
+ * UCA version number (3.1.1, 4.0).
+ * @param coll The UCollator to query.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
+
+/**
+ * Merges two sort keys. The levels are merged with their corresponding counterparts
+ * (primaries with primaries, secondaries with secondaries etc.). Between the values
+ * from the same level a separator is inserted.
+ *
+ * This is useful, for example, for combining sort keys from first and last names
+ * to sort such pairs.
+ * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
+ *
+ * The recommended way to achieve "merged" sorting is by
+ * concatenating strings with U+FFFE between them.
+ * The concatenation has the same sort order as the merged sort keys,
+ * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2).
+ * Using strings with U+FFFE may yield shorter sort keys.
+ *
+ * For details about Sort Key Features see
+ * https://unicode-org.github.io/icu/userguide/collation/api#sort-key-features
+ *
+ * It is possible to merge multiple sort keys by consecutively merging
+ * another one with the intermediate result.
+ *
+ * The length of the merge result is the sum of the lengths of the input sort keys.
+ *
+ * Example (uncompressed):
+ * 191B1D 01 050505 01 910505 00
+ * 1F2123 01 050505 01 910505 00
+ * will be merged as
+ * 191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00
+ *
+ * If the destination buffer is not big enough, then its contents are undefined.
+ * If any of source lengths are zero or any of the source pointers are NULL/undefined,
+ * the result is of size zero.
+ *
+ * @param src1 the first sort key
+ * @param src1Length the length of the first sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param src2 the second sort key
+ * @param src2Length the length of the second sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param dest the buffer where the merged sort key is written,
+ * can be NULL if destCapacity==0
+ * @param destCapacity the number of bytes in the dest buffer
+ * @return the length of the merged sort key, src1Length+src2Length;
+ * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
+ * in which cases the contents of dest is undefined
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
+ const uint8_t *src2, int32_t src2Length,
+ uint8_t *dest, int32_t destCapacity);
+
+/**
+ * Universal attribute setter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @param value attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_getAttribute
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
+
+/**
+ * Universal attribute getter
+ * @param coll collator which attributes are to be changed
+ * @param attr attribute type
+ * @return attribute value
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @see UColAttribute
+ * @see UColAttributeValue
+ * @see ucol_setAttribute
+ * @stable ICU 2.0
+ */
+U_CAPI UColAttributeValue U_EXPORT2
+ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
+
+/**
+ * Sets the variable top to the top of the specified reordering group.
+ * The variable top determines the highest-sorting character
+ * which is affected by UCOL_ALTERNATE_HANDLING.
+ * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
+ * @param coll the collator
+ * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
+ * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
+ * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @see ucol_getMaxVariable
+ * @stable ICU 53
+ */
+U_CAPI void U_EXPORT2
+ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
+
+/**
+ * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
+ * @param coll the collator
+ * @return the maximum variable reordering group.
+ * @see ucol_setMaxVariable
+ * @stable ICU 53
+ */
+U_CAPI UColReorderCode U_EXPORT2
+ucol_getMaxVariable(const UCollator *coll);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Gets the variable top value of a Collator.
+ * @param coll collator which variable top needs to be retrieved
+ * @param status error code (not changed by function). If error code is set,
+ * the return value is undefined.
+ * @return the variable top primary weight
+ * @see ucol_getMaxVariable
+ * @see ucol_setVariableTop
+ * @see ucol_restoreVariableTop
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Thread safe cloning operation. The result is a clone of a given collator.
+ * @param coll collator to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @see ucol_open
+ * @see ucol_openRules
+ * @see ucol_close
+ * @stable ICU 71
+ */
+U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status);
+
+/**
+ * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
+ * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
+ * to store rules, will store up to available space.
+ *
+ * ucol_getRules() should normally be used instead.
+ * See https://unicode-org.github.io/icu/userguide/collation/customization#building-on-existing-locales
+ * @param coll collator to get the rules from
+ * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
+ * @param buffer buffer to store the result in. If NULL, you'll get no rules.
+ * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
+ * @return current rules
+ * @stable ICU 2.0
+ * @see UCOL_FULL_RULES
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * gets the locale name of the collator. If the collator
+ * is instantiated from the rules, then this function returns
+ * NULL.
+ * @param coll The UCollator for which the locale is needed
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status error code of the operation
+ * @return real locale name from which the collation data comes.
+ * If the collator was instantiated from rules, returns
+ * NULL.
+ * @stable ICU 2.8
+ */
+U_CAPI const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
+
+/**
+ * Get a Unicode set that contains all the characters and sequences tailored in
+ * this collator. The result must be disposed of by using uset_close.
+ * @param coll The UCollator for which we want to get tailored chars
+ * @param status error code of the operation
+ * @return a pointer to newly created USet. Must be be disposed by using uset_close
+ * @see ucol_openRules
+ * @see uset_close
+ * @stable ICU 2.4
+ */
+U_CAPI USet * U_EXPORT2
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
+
+#ifndef U_HIDE_INTERNAL_API
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/** Creates a binary image of a collator. This binary image can be stored and
+ * later used to instantiate a collator using ucol_openBinary.
+ * This API supports preflighting.
+ * @param coll Collator
+ * @param buffer a fill-in buffer to receive the binary image
+ * @param capacity capacity of the destination buffer
+ * @param status for catching errors
+ * @return size of the image
+ * @see ucol_openBinary
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+ uint8_t *buffer, int32_t capacity,
+ UErrorCode *status);
+
+/** Opens a collator from a collator binary image created using
+ * ucol_cloneBinary. Binary image used in instantiation of the
+ * collator remains owned by the user and should stay around for
+ * the lifetime of the collator. The API also takes a base collator
+ * which must be the root collator.
+ * @param bin binary image owned by the user and required through the
+ * lifetime of the collator
+ * @param length size of the image. If negative, the API will try to
+ * figure out the length of the image
+ * @param base Base collator, for lookup of untailored characters.
+ * Must be the root collator, must not be NULL.
+ * The base is required to be present through the lifetime of the collator.
+ * @param status for catching errors
+ * @return newly created collator
+ * @see ucol_cloneBinary
+ * @stable ICU 3.2
+ */
+U_CAPI UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+ const UCollator *base,
+ UErrorCode *status);
+
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/udat.h b/third_party/icu4c/ndk_headers/unicode/udat.h
new file mode 100644
index 00000000000..ba0ee5ba21d
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/udat.h
@@ -0,0 +1,1588 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *******************************************************************************
+*/
+
+#ifndef UDAT_H
+#define UDAT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/ucal.h"
+#include "unicode/unum.h"
+#include "unicode/udisplaycontext.h"
+#include "unicode/ufieldpositer.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: DateFormat
+ *
+ * Date Format C API
+ *
+ * Date Format C API consists of functions that convert dates and
+ * times from their internal representations to textual form and back again in a
+ * language-independent manner. Converting from the internal representation (milliseconds
+ * since midnight, January 1, 1970) to text is known as "formatting," and converting
+ * from text to millis is known as "parsing." We currently define only one concrete
+ * structure UDateFormat, which can handle pretty much all normal
+ * date formatting and parsing actions.
+ *
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * UChar *myString;
+ * int32_t myStrlen = 0;
+ * UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, -1, &status);
+ * myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, NULL, &status);
+ * if (status==U_BUFFER_OVERFLOW_ERROR){
+ * status=U_ZERO_ERROR;
+ * myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ * udat_format(dfmt, myDate, myString, myStrlen+1, NULL, &status);
+ * }
+ * \endcode
+ *
+ * If you are formatting multiple numbers, it is more efficient to get the
+ * format and use it multiple times so that the system doesn't have to fetch the
+ * information about the local language and country conventions multiple times.
+ *
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * int32_t i, myStrlen = 0;
+ * UChar* myString;
+ * char buffer[1024];
+ * UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values
+ * UDateFormat* df = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, 0, &status);
+ * for (i = 0; i < 3; i++) {
+ * myStrlen = udat_format(df, myDateArr[i], NULL, myStrlen, NULL, &status);
+ * if(status == U_BUFFER_OVERFLOW_ERROR){
+ * status = U_ZERO_ERROR;
+ * myString = (UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ * udat_format(df, myDateArr[i], myString, myStrlen+1, NULL, &status);
+ * printf("%s\n", u_austrcpy(buffer, myString) );
+ * free(myString);
+ * }
+ * }
+ * \endcode
+ *
+ * To get specific fields of a date, you can use UFieldPosition to
+ * get specific fields.
+ *
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * UFieldPosition pos;
+ * UChar *myString;
+ * int32_t myStrlen = 0;
+ * char buffer[1024];
+ *
+ * pos.field = 1; // Same as the DateFormat::EField enum
+ * UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, -1, NULL, 0, &status);
+ * myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, &pos, &status);
+ * if (status==U_BUFFER_OVERFLOW_ERROR){
+ * status=U_ZERO_ERROR;
+ * myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) );
+ * udat_format(dfmt, myDate, myString, myStrlen+1, &pos, &status);
+ * }
+ * printf("date format: %s\n", u_austrcpy(buffer, myString));
+ * buffer[pos.endIndex] = 0; // NULL terminate the string.
+ * printf("UFieldPosition position equals %s\n", &buffer[pos.beginIndex]);
+ * \endcode
+ *
+ * To format a date for a different Locale, specify it in the call to
+ * udat_open()
+ *
+ * \code
+ * UDateFormat* df = udat_open(UDAT_SHORT, UDAT_SHORT, "fr_FR", NULL, -1, NULL, 0, &status);
+ * \endcode
+ *
+ * You can use a DateFormat API udat_parse() to parse.
+ *
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * int32_t parsepos=0;
+ * UDate myDate = udat_parse(df, myString, u_strlen(myString), &parsepos, &status);
+ * \endcode
+ *
+ * You can pass in different options for the arguments for date and time style
+ * to control the length of the result; from SHORT to MEDIUM to LONG to FULL.
+ * The exact result depends on the locale, but generally:
+ * see UDateFormatStyle for more details
+ *
+ *
+ * You can also set the time zone on the format if you wish.
+ *
+ *
+ * UDateFormat
supports
+ * the date and time formatting algorithm and pattern letters defined by
+ * UTS#35
+ * Unicode Locale Data Markup Language (LDML) and further documented for ICU in the
+ * ICU
+ * User Guide. ULoc C API for Locale
+ * A Locale
represents a specific geographical, political,
+ * or cultural region. An operation that requires a Locale
to perform
+ * its task is called locale-sensitive and uses the Locale
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture. In the C APIs, a locales is simply a const char string.
+ *
+ * Locale
with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ * The first option is a valid ISO
+ * Language Code. These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ *
+ * \code
+ * newLanguage
+ *
+ * newLanguage + newCountry
+ *
+ * newLanguage + newCountry + newVariant
+ * \endcode
+ *
+ * \htmlonly
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt
+ *
+ *
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html
+ *
+ * Locale
is just an identifier for a region,
+ * no validity check is performed when you specify a Locale
.
+ * If you want to see whether particular resources are available for the
+ * Locale
you asked for, you must query those resources. For
+ * example, ask the UNumberFormat
for the locales it supports
+ * using its getAvailable
method.
+ *
Note: When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * UResourceBundle
.
+ *
+ * Locale
provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ *
+ *
+ * \code
+ * ULOC_US
+ * \endcode
+ *
+ * \htmlonlyuloc_getCountry
to get the ISO Country Code and
+ * uloc_getLanguage
to get the ISO Language Code. You can
+ * use uloc_getDisplayCountry
to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use uloc_getDisplayLanguage
to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the uloc_getDisplayXXX
methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * unum_xxx
functions format
+ * numbers, currency, or percentages in a locale-sensitive manner.
+ * \endhtmlonly
+ *
\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly
+ * \code
+ * UErrorCode success = U_ZERO_ERROR;
+ * UNumberFormat *nf;
+ * const char* myLocale = "fr_FR";
+ *
+ * nf = unum_open( UNUM_DEFAULT, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, NULL, success );
+ * unum_close(nf);
+ * \endcode
+ *
+ * \htmlonly\endhtmlonly
+ *
\endhtmlonly
+ * A
+ * \code
+ *
+ * nf = unum_open( UNUM_DEFAULT, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, myLocale, success );
+ * unum_close(nf);
+ * \endcode
+ *
+ * \htmlonlyLocale
is the mechanism for identifying the kind of services
+ * (UNumberFormat
) that you would like to get. The locale is
+ * just a mechanism for identifying these services.
+ *
+ * \endhtmlonly
+ *
\endhtmlonly
+ *
+ * \code
+ * const char* uloc_getAvailable(int32_t index);
+ * int32_t uloc_countAvailable();
+ * int32_t
+ * uloc_getDisplayName(const char* localeID,
+ * const char* inLocaleID,
+ * UChar* result,
+ * int32_t maxResultSize,
+ * UErrorCode* err);
+ *
+ * \endcode
+ *
+ * \htmlonly
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ *
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_openKeywords
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the value of the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name. If it's greater
+ * than maxResultSize, the returned displayable name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+ const char* inLocaleID,
+ UChar* result,
+ int32_t maxResultSize,
+ UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of available locales.
+ *
+ * This method corresponds to uloc_openAvailableByType called with the
+ * ULOC_AVAILABLE_DEFAULT type argument.
+ *
+ * The return value is a pointer to an item of a locale name array. Both this
+ * array and the pointers it contains are owned by ICU and should not be
+ * deleted or written through by the caller. The locale name is terminated by
+ * a null pointer.
+ *
+ * @param n the specific locale name index of the available locale list;
+ * should not exceed the number returned by uloc_countAvailable.
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ * Types for uloc_getAvailableByType and uloc_countAvailableByType.
+ *
+ * @stable ICU 65
+ */
+typedef enum ULocAvailableType {
+ /**
+ * Locales that return data when passed to ICU APIs,
+ * but not including legacy or alias locales.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_DEFAULT,
+
+ /**
+ * Legacy or alias locales that return data when passed to ICU APIs.
+ * Examples of supported legacy or alias locales:
+ *
+ * - iw (alias to he)
+ * - mo (alias to ro)
+ * - zh_CN (alias to zh_Hans_CN)
+ * - sr_BA (alias to sr_Cyrl_BA)
+ * - ars (alias to ar_SA)
+ *
+ * The locales in this set are disjoint from the ones in
+ * ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use
+ * ULOC_AVAILABLE_WITH_LEGACY_ALIASES.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_ONLY_LEGACY_ALIASES,
+
+ /**
+ * The union of the locales in ULOC_AVAILABLE_DEFAULT and
+ * ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_WITH_LEGACY_ALIASES,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ ULOC_AVAILABLE_COUNT
+#endif /* U_HIDE_INTERNAL_API */
+} ULocAvailableType;
+
+/**
+ * Gets a list of available locales according to the type argument, allowing
+ * the user to access different sets of supported locales in ICU.
+ *
+ * The returned UEnumeration must be closed by the caller.
+ *
+ * @param type Type choice from ULocAvailableType.
+ * @param status Set if an error occurred.
+ * @return a UEnumeration owned by the caller, or nullptr on failure.
+ * @stable ICU 65
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status);
+
+/**
+ *
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @stable ICU 2.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough or ill-formed localeID
+ * or keywordName parameters.
+ * @return the length of keyword value
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text, and will
+ * not update the buffer with unterminated text setting a status of
+ * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received,
+ * it means a terminated version of the updated locale ID would not fit
+ * in the buffer, and the original buffer is untouched. This is done to
+ * prevent incorrect or possibly even malformed locales from being generated
+ * and used.
+ *
+ * @param keywordName name of the keyword to be set; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed; no error is given if
+ * that keyword does not exist. Otherwise, must consist only of
+ * [A-Za-z0-9] and [/_+-].
+ * @param buffer input buffer containing well-formed locale ID to be
+ * modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough
+ * or ill-formed keywordName or keywordValue parameters, or ill-formed
+ * locale ID in buffer on input.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+/**
+ * Returns whether the locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then false is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
+ *
+ * @param locale input locale ID
+ * @return true if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+U_CAPI UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale);
+
+/**
+ * enums for the return value for the character and line orientation
+ * functions.
+ * @stable ICU 4.0
+ */
+typedef enum {
+ ULOC_LAYOUT_LTR = 0, /* left-to-right. */
+ ULOC_LAYOUT_RTL = 1, /* right-to-left. */
+ ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */
+ ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */
+ ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
+ *
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @stable ICU 3.2
+ */
+typedef enum {
+ /**
+ * No exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FAILED = 0,
+ /**
+ * An exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_VALID = 1,
+ /**
+ * A fallback was found. For example, the Accept-Language list includes 'ja_JP'
+ * and is matched with available locale 'ja'.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FALLBACK = 2 /* */
+} UAcceptResult;
+
+/**
+ * Based on a HTTP header from a web browser and a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/**
+ * Based on a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param acceptList - list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult, const char **acceptList,
+ int32_t acceptListCount,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+
+/**
+ * Gets the ICU locale ID for the specified Win32 LCID value.
+ *
+ * @param hostID the Win32 LCID to translate
+ * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
+ * if there is room.
+ * @param localeCapacity the size of the output buffer
+ * @param status an error is returned if the LCID is unrecognized or the output buffer
+ * is too small
+ * @return actual the actual size of the locale ID, not including NUL-termination
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
+ UErrorCode *status);
+
+
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale. If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale. If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * strict
is false, any locale
+ * fields which do not satisfy the BCP47 syntax requirement will
+ * be omitted from the result. When strict
is
+ * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
+ * err
if any locale fields do not satisfy the
+ * BCP47 syntax requirement.
+ * @param localeID the input locale ID
+ * @param langtag the output buffer receiving BCP47 language
+ * tag for the locale ID.
+ * @param langtagCapacity the size of the BCP47 language tag
+ * output buffer.
+ * @param strict boolean value indicating if the function returns
+ * an error for an ill-formed input locale ID.
+ * @param err error information if receiving the language
+ * tag failed.
+ * @return The length of the BCP47 language tag.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* err);
+
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * uloc_toUnicodeLocaleKey("ZZ")
returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * uloc_toUnicodeLocaleType("Foo", "Bar")
returns "Bar",
+ * uloc_toUnicodeLocaleType("variableTop", "00A4")
returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * uloc_toLegacyType("Foo", "Bar")
returns "Bar",
+ * uloc_toLegacyType("vt", "00A4")
returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /*_ULOC*/
diff --git a/third_party/icu4c/ndk_headers/unicode/umisc.h b/third_party/icu4c/ndk_headers/unicode/umisc.h
new file mode 100644
index 00000000000..06f62b0be14
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/umisc.h
@@ -0,0 +1,62 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: umisc.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct15
+* created by: Markus W. Scherer
+*/
+
+#ifndef UMISC_H
+#define UMISC_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Miscellaneous definitions
+ *
+ * This file contains miscellaneous definitions for the C APIs.
+ */
+
+U_CDECL_BEGIN
+
+/** A struct representing a range of text containing a specific field
+ * @stable ICU 2.0
+ */
+typedef struct UFieldPosition {
+ /**
+ * The field
+ * @stable ICU 2.0
+ */
+ int32_t field;
+ /**
+ * The start of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t beginIndex;
+ /**
+ * The limit of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t endIndex;
+} UFieldPosition;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Opaque type returned by registerInstance, registerFactory and unregister for service registration.
+ * @stable ICU 2.6
+ */
+typedef const void* URegistryKey;
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/unorm.h b/third_party/icu4c/ndk_headers/unicode/unorm.h
new file mode 100644
index 00000000000..d3f5a12faa9
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/unorm.h
@@ -0,0 +1 @@
+
diff --git a/third_party/icu4c/ndk_headers/unicode/unorm2.h b/third_party/icu4c/ndk_headers/unicode/unorm2.h
new file mode 100644
index 00000000000..42c672ca5a0
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/unorm2.h
@@ -0,0 +1,606 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unorm2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009dec15
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNORM2_H__
+#define __UNORM2_H__
+
+/**
+ * \file
+ * \brief C API: New API for Unicode Normalization.
+ *
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of UNormalizer2 are unmodifiable/immutable.
+ * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
+ * For more details see the Normalizer2 C++ class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/uset.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * Constants for normalization modes.
+ * For details about standard Unicode normalization forms
+ * and about the algorithms which are also used with custom mapping tables
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Decomposition followed by composition.
+ * Same as standard NFC when using an "nfc" instance.
+ * Same as standard NFKC when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE,
+ /**
+ * Map, and reorder canonically.
+ * Same as standard NFD when using an "nfc" instance.
+ * Same as standard NFKD when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_DECOMPOSE,
+ /**
+ * "Fast C or D" form.
+ * If a string is in this form, then further decomposition without reordering
+ * would yield the same form as DECOMPOSE.
+ * Text in "Fast C or D" form can be processed efficiently with data tables
+ * that are "canonically closed", that is, that provide equivalent data for
+ * equivalent text, without having to be fully normalized.
+ * Not a standard Unicode normalization form.
+ * Not a unique form: Different FCD strings can be canonically equivalent.
+ * For details see http://www.unicode.org/notes/tn5/#FCD
+ * @stable ICU 4.4
+ */
+ UNORM2_FCD,
+ /**
+ * Compose only contiguously.
+ * Also known as "FCC" or "Fast C Contiguous".
+ * The result will often but not always be in NFC.
+ * The result will conform to FCD which is useful for processing.
+ * Not a standard Unicode normalization form.
+ * For details see http://www.unicode.org/notes/tn5/#FCC
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE_CONTIGUOUS
+} UNormalization2Mode;
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * The input string is not in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_NO,
+ /**
+ * The input string is in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_YES,
+ /**
+ * The input string may or may not be in the normalization form.
+ * This value is only returned for composition forms like NFC and FCC,
+ * when a backward-combining character is found for which the surrounding text
+ * would have to be analyzed further.
+ * @stable ICU 2.0
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+
+/**
+ * Opaque C service object type for the new normalization API.
+ * @stable ICU 4.4
+ */
+struct UNormalizer2;
+typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFC normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFD normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKD normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Constructs a filtered normalizer wrapping any UNormalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param norm2 wrapped UNormalizer2 instance
+ * @param filterSet USet which determines the characters to be normalized
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UNormalizer2 instance from unorm2_openFiltered().
+ * Do not close instances from unorm2_getInstance()!
+ * @param norm2 UNormalizer2 instance to be closed
+ * @stable ICU 4.4
+ */
+U_CAPI void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNormalizer2Pointer
+ * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the length of the destination string.
+ * The source and destination strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param src source string
+ * @param length length of the source string, or -1 if NUL-terminated
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param capacity number of UChars that can be written to dest
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+ const UChar *src, int32_t length,
+ UChar *dest, int32_t capacity,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, will be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, should be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
+ * returns a negative value and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the unorm2_getDecomposition() function but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, unorm2_getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 UChars).
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+U_CAPI UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
+
+/**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+U_CAPI uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with end=spanQuickCheckYes(s, ec);
+ * the substring UnicodeString(s, 0, end)
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Compares two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* !UCONFIG_NO_NORMALIZATION */
+#endif /* __UNORM2_H__ */
diff --git a/third_party/icu4c/ndk_headers/unicode/unum.h b/third_party/icu4c/ndk_headers/unicode/unum.h
new file mode 100644
index 00000000000..d4f030b7e97
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/unum.h
@@ -0,0 +1,1441 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2015, International Business Machines Corporation and others.
+* All Rights Reserved.
+* Modification History:
+*
+* Date Name Description
+* 06/24/99 helena Integrated Alan's NF enhancements and Java2 bug fixes
+*******************************************************************************
+*/
+
+#ifndef _UNUM
+#define _UNUM
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/uloc.h"
+#include "unicode/umisc.h"
+#include "unicode/parseerr.h"
+#include "unicode/udisplaycontext.h"
+#include "unicode/ufieldpositer.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Compatibility APIs for number formatting.
+ *
+ * Number Format C API
+ *
+ *
+ * \code
+ * UChar myString[20];
+ * double myNumber = 7.0;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ * unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
+ * printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*)
+ * \endcode
+ *
+ * If you are formatting multiple numbers, it is more efficient to get
+ * the format and use it multiple times so that the system doesn't
+ * have to fetch the information about the local language and country
+ * conventions multiple times.
+ *
+ * \code
+ * uint32_t i, resultlength, reslenneeded;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UFieldPosition pos;
+ * uint32_t a[] = { 123, 3333, -1234567 };
+ * const uint32_t a_len = sizeof(a) / sizeof(a[0]);
+ * UNumberFormat* nf;
+ * UChar* result = NULL;
+ *
+ * nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status);
+ * for (i = 0; i < a_len; i++) {
+ * resultlength=0;
+ * reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status);
+ * result = NULL;
+ * if(status==U_BUFFER_OVERFLOW_ERROR){
+ * status=U_ZERO_ERROR;
+ * resultlength=reslenneeded+1;
+ * result=(UChar*)malloc(sizeof(UChar) * resultlength);
+ * unum_format(nf, a[i], result, resultlength, &pos, &status);
+ * }
+ * printf( " Example 2: %s\n", austrdup(result));
+ * free(result);
+ * }
+ * \endcode
+ *
+ * To format a number for a different Locale, specify it in the
+ * call to unum_open().
+ *
+ * \code
+ * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success)
+ * \endcode
+ *
+ * You can use a NumberFormat API unum_parse() to parse.
+ *
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * int32_t pos=0;
+ * int32_t num;
+ * num = unum_parse(nf, str, u_strlen(str), &pos, &status);
+ * \endcode
+ *
+ * Use UNUM_DECIMAL to get the normal number format for that country.
+ * There are other static options available. Use UNUM_CURRENCY
+ * to get the currency number format for that country. Use UNUM_PERCENT
+ * to get a format for displaying percentages. With this format, a
+ * fraction from 0.53 is displayed as 53%.
+ *
+ *
+ * format()
is padded. */
+ UNUM_FORMAT_WIDTH,
+ /** The position at which padding will take place. */
+ UNUM_PADDING_POSITION,
+ /** Secondary grouping size */
+ UNUM_SECONDARY_GROUPING_SIZE,
+ /** Use significant digits
+ * @stable ICU 3.0 */
+ UNUM_SIGNIFICANT_DIGITS_USED,
+ /** Minimum significant digits
+ * @stable ICU 3.0 */
+ UNUM_MIN_SIGNIFICANT_DIGITS,
+ /** Maximum significant digits
+ * @stable ICU 3.0 */
+ UNUM_MAX_SIGNIFICANT_DIGITS,
+ /** Lenient parse mode used by rule-based formats.
+ * @stable ICU 3.0
+ */
+ UNUM_LENIENT_PARSE,
+#if UCONFIG_HAVE_PARSEALLINPUT
+ /** Consume all input. (may use fastpath). Set to UNUM_YES (require fastpath), UNUM_NO (skip fastpath), or UNUM_MAYBE (heuristic).
+ * This is an internal ICU API. Do not use.
+ * @internal
+ */
+ UNUM_PARSE_ALL_INPUT = 20,
+#endif
+ /**
+ * Scale, which adjusts the position of the
+ * decimal point when formatting. Amounts will be multiplied by 10 ^ (scale)
+ * before they are formatted. The default value for the scale is 0 ( no adjustment ).
+ *
+ * length>=size
+* @see unum_setSymbol
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+unum_getSymbol(const UNumberFormat *fmt,
+ UNumberFormatSymbol symbol,
+ UChar *buffer,
+ int32_t size,
+ UErrorCode *status);
+
+/**
+* Set a symbol associated with a UNumberFormat.
+* A UNumberFormat uses symbols to represent the special locale-dependent
+* characters in a number, for example the percent sign. This API is not
+* supported for rule-based formatters.
+* @param fmt The formatter to set.
+* @param symbol The UNumberFormatSymbol constant for the symbol to set
+* @param value The string to set the symbol to
+* @param length The length of the string, or -1 for a zero-terminated string
+* @param status A pointer to an UErrorCode to receive any errors.
+* @see unum_getSymbol
+* @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+unum_setSymbol(UNumberFormat *fmt,
+ UNumberFormatSymbol symbol,
+ const UChar *value,
+ int32_t length,
+ UErrorCode *status);
+
+
+/**
+ * Get the locale for this number format object.
+ * You can choose between valid and actual locale.
+ * @param fmt The formatter to get the locale from
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale name
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+unum_getLocaleByType(const UNumberFormat *fmt,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+/**
+ * Set a particular UDisplayContext value in the formatter, such as
+ * UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
+ * @param fmt The formatter for which to set a UDisplayContext value.
+ * @param value The UDisplayContext value to set.
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @stable ICU 53
+ */
+U_CAPI void U_EXPORT2
+unum_setContext(UNumberFormat* fmt, UDisplayContext value, UErrorCode* status);
+
+/**
+ * Get the formatter's UDisplayContext value for the specified UDisplayContextType,
+ * such as UDISPCTX_TYPE_CAPITALIZATION.
+ * @param fmt The formatter to query.
+ * @param type The UDisplayContextType whose value to return
+ * @param status A pointer to an UErrorCode to receive any errors
+ * @return The UDisplayContextValue for the specified type.
+ * @stable ICU 53
+ */
+U_CAPI UDisplayContext U_EXPORT2
+unum_getContext(const UNumberFormat *fmt, UDisplayContextType type, UErrorCode* status);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/unumberformatter.h b/third_party/icu4c/ndk_headers/unicode/unumberformatter.h
new file mode 100644
index 00000000000..de8aa0d88fb
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/unumberformatter.h
@@ -0,0 +1,707 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __UNUMBERFORMATTER_H__
+#define __UNUMBERFORMATTER_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/parseerr.h"
+#include "unicode/ufieldpositer.h"
+#include "unicode/umisc.h"
+
+
+/**
+ * \file
+ * \brief C API: Localized number formatting; not recommended for C++.
+ *
+ * This is the C-compatible version of the NumberFormatter API introduced in ICU 60. C++ users should
+ * include unicode/numberformatter.h and use the proper C++ APIs.
+ *
+ * The C API accepts a number skeleton string for specifying the settings for formatting, which covers a
+ * very large subset of all possible number formatting features. For more information on number skeleton
+ * strings, see unicode/numberformatter.h.
+ *
+ * When using UNumberFormatter, which is treated as immutable, the results are exported to a mutable
+ * UFormattedNumber object, which you subsequently use for populating your string buffer or iterating over
+ * the fields.
+ *
+ * Example code:
+ *
+ * // Setup:
+ * UErrorCode ec = U_ZERO_ERROR;
+ * UNumberFormatter* uformatter = unumf_openForSkeletonAndLocale(u"precision-integer", -1, "en", &ec);
+ * UFormattedNumber* uresult = unumf_openResult(&ec);
+ * if (U_FAILURE(ec)) { return; }
+ *
+ * // Format a double:
+ * unumf_formatDouble(uformatter, 5142.3, uresult, &ec);
+ * if (U_FAILURE(ec)) { return; }
+ *
+ * // Export the string to a malloc'd buffer:
+ * int32_t len = unumf_resultToString(uresult, NULL, 0, &ec);
+ * // at this point, ec == U_BUFFER_OVERFLOW_ERROR
+ * ec = U_ZERO_ERROR;
+ * UChar* buffer = (UChar*) malloc((len+1)*sizeof(UChar));
+ * unumf_resultToString(uresult, buffer, len+1, &ec);
+ * if (U_FAILURE(ec)) { return; }
+ * // buffer should equal "5,142"
+ *
+ * // Cleanup:
+ * unumf_close(uformatter);
+ * unumf_closeResult(uresult);
+ * free(buffer);
+ *
+ *
+ * If you are a C++ user linking against the C libraries, you can use the LocalPointer versions of these
+ * APIs. The following example uses LocalPointer with the decimal number and field position APIs:
+ *
+ *
+ * // Setup:
+ * LocalUNumberFormatterPointer uformatter(unumf_openForSkeletonAndLocale(u"percent", -1, "en", &ec));
+ * LocalUFormattedNumberPointer uresult(unumf_openResult(&ec));
+ * if (U_FAILURE(ec)) { return; }
+ *
+ * // Format a decimal number:
+ * unumf_formatDecimal(uformatter.getAlias(), "9.87E-3", -1, uresult.getAlias(), &ec);
+ * if (U_FAILURE(ec)) { return; }
+ *
+ * // Get the location of the percent sign:
+ * UFieldPosition ufpos = {UNUM_PERCENT_FIELD, 0, 0};
+ * unumf_resultNextFieldPosition(uresult.getAlias(), &ufpos, &ec);
+ * // ufpos should contain beginIndex=7 and endIndex=8 since the string is "0.00987%"
+ *
+ * // No need to do any cleanup since we are using LocalPointer.
+ *
+ */
+
+/**
+ * An enum declaring how to resolve conflicts between maximum fraction digits and maximum
+ * significant digits.
+ *
+ * There are two modes, RELAXED and STRICT:
+ *
+ * - RELAXED: Relax one of the two constraints (fraction digits or significant digits) in order
+ * to round the number to a higher level of precision.
+ * - STRICT: Enforce both constraints, resulting in the number being rounded to a lower
+ * level of precision.
+ *
+ * The default settings for compact notation rounding are Max-Fraction = 0 (round to the nearest
+ * integer), Max-Significant = 2 (round to 2 significant digits), and priority RELAXED (choose
+ * the constraint that results in more digits being displayed).
+ *
+ * Conflicting *minimum* fraction and significant digits are always resolved in the direction that
+ * results in more trailing zeros.
+ *
+ * Example 1: Consider the number 3.141, with various different settings:
+ *
+ * - Max-Fraction = 1: "3.1"
+ * - Max-Significant = 3: "3.14"
+ *
+ * The rounding priority determines how to resolve the conflict when both Max-Fraction and
+ * Max-Significant are set. With RELAXED, the less-strict setting (the one that causes more digits
+ * to be displayed) will be used; Max-Significant wins. With STRICT, the more-strict setting (the
+ * one that causes fewer digits to be displayed) will be used; Max-Fraction wins.
+ *
+ * Example 2: Consider the number 8317, with various different settings:
+ *
+ * - Max-Fraction = 1: "8317"
+ * - Max-Significant = 3: "8320"
+ *
+ * Here, RELAXED favors Max-Fraction and STRICT favors Max-Significant. Note that this larger
+ * number caused the two modes to favor the opposite result.
+ *
+ * @stable ICU 69
+ */
+typedef enum UNumberRoundingPriority {
+ /**
+ * Favor greater precision by relaxing one of the rounding constraints.
+ *
+ * @stable ICU 69
+ */
+ UNUM_ROUNDING_PRIORITY_RELAXED,
+
+ /**
+ * Favor adherence to all rounding constraints by producing lower precision.
+ *
+ * @stable ICU 69
+ */
+ UNUM_ROUNDING_PRIORITY_STRICT,
+} UNumberRoundingPriority;
+
+/**
+ * An enum declaring how to render units, including currencies. Example outputs when formatting 123 USD and 123
+ * meters in en-CA:
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * @stable ICU 60
+ */
+typedef enum UNumberDecimalSeparatorDisplay {
+ /**
+ * Show the decimal separator when there are one or more digits to display after the separator, and do not show
+ * it otherwise. This is the default behavior.
+ *
+ * @stable ICU 60
+ */
+ UNUM_DECIMAL_SEPARATOR_AUTO,
+
+ /**
+ * Always show the decimal separator, even if there are no digits to display after the separator.
+ *
+ * @stable ICU 60
+ */
+ UNUM_DECIMAL_SEPARATOR_ALWAYS,
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // needed for unconditionalized struct MacroProps
+ /**
+ * One more than the highest UNumberDecimalSeparatorDisplay value.
+ *
+ * @internal ICU 60: The numeric value may change over time; see ICU ticket #12420.
+ */
+ UNUM_DECIMAL_SEPARATOR_COUNT
+} UNumberDecimalSeparatorDisplay;
+
+/**
+ * An enum declaring how to render trailing zeros.
+ *
+ * - UNUM_TRAILING_ZERO_AUTO: 0.90, 1.00, 1.10
+ * - UNUM_TRAILING_ZERO_HIDE_IF_WHOLE: 0.90, 1, 1.10
+ *
+ * @stable ICU 69
+ */
+typedef enum UNumberTrailingZeroDisplay {
+ /**
+ * Display trailing zeros according to the settings for minimum fraction and significant digits.
+ *
+ * @stable ICU 69
+ */
+ UNUM_TRAILING_ZERO_AUTO,
+
+ /**
+ * Same as AUTO, but hide trailing zeros after the decimal separator if they are all zero.
+ *
+ * @stable ICU 69
+ */
+ UNUM_TRAILING_ZERO_HIDE_IF_WHOLE,
+} UNumberTrailingZeroDisplay;
+
+struct UNumberFormatter;
+/**
+ * C-compatible version of icu::number::LocalizedNumberFormatter.
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @stable ICU 62
+ */
+typedef struct UNumberFormatter UNumberFormatter;
+
+struct UFormattedNumber;
+/**
+ * C-compatible version of icu::number::FormattedNumber.
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @stable ICU 62
+ */
+typedef struct UFormattedNumber UFormattedNumber;
+
+
+/**
+ * Creates a new UNumberFormatter for the given skeleton string and locale. This is currently the only
+ * method for creating a new UNumberFormatter.
+ *
+ * Objects of type UNumberFormatter returned by this method are threadsafe.
+ *
+ * For more details on skeleton strings, see the documentation in numberformatter.h. For more details on
+ * the usage of this API, see the documentation at the top of unumberformatter.h.
+ *
+ * For more information on number skeleton strings, see:
+ * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @param skeleton The skeleton string, like u"percent precision-integer"
+ * @param skeletonLen The number of UChars in the skeleton string, or -1 if it is NUL-terminated.
+ * @param locale The NUL-terminated locale ID.
+ * @param ec Set if an error occurs.
+ * @stable ICU 62
+ */
+U_CAPI UNumberFormatter* U_EXPORT2
+unumf_openForSkeletonAndLocale(const UChar* skeleton, int32_t skeletonLen, const char* locale,
+ UErrorCode* ec);
+
+
+/**
+ * Like unumf_openForSkeletonAndLocale, but accepts a UParseError, which will be populated with the
+ * location of a skeleton syntax error if such a syntax error exists.
+ *
+ * For more information on number skeleton strings, see:
+ * https://unicode-org.github.io/icu/userguide/format_parse/numbers/skeletons.html
+ *
+ * @param skeleton The skeleton string, like u"percent precision-integer"
+ * @param skeletonLen The number of UChars in the skeleton string, or -1 if it is NUL-terminated.
+ * @param locale The NUL-terminated locale ID.
+ * @param perror A parse error struct populated if an error occurs when parsing. Can be NULL.
+ * If no error occurs, perror->offset will be set to -1.
+ * @param ec Set if an error occurs.
+ * @stable ICU 64
+ */
+U_CAPI UNumberFormatter* U_EXPORT2
+unumf_openForSkeletonAndLocaleWithError(
+ const UChar* skeleton, int32_t skeletonLen, const char* locale, UParseError* perror, UErrorCode* ec);
+
+
+
+/**
+ * Uses a UNumberFormatter to format an integer to a UFormattedNumber. A string, field position, and other
+ * information can be retrieved from the UFormattedNumber.
+ *
+ * The UNumberFormatter can be shared between threads. Each thread should have its own local
+ * UFormattedNumber, however, for storing the result of the formatting operation.
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @param uformatter A formatter object created by unumf_openForSkeletonAndLocale or similar.
+ * @param value The number to be formatted.
+ * @param uresult The object that will be mutated to store the result; see unumf_openResult.
+ * @param ec Set if an error occurs.
+ * @stable ICU 62
+ */
+U_CAPI void U_EXPORT2
+unumf_formatInt(const UNumberFormatter* uformatter, int64_t value, UFormattedNumber* uresult,
+ UErrorCode* ec);
+
+
+/**
+ * Uses a UNumberFormatter to format a double to a UFormattedNumber. A string, field position, and other
+ * information can be retrieved from the UFormattedNumber.
+ *
+ * The UNumberFormatter can be shared between threads. Each thread should have its own local
+ * UFormattedNumber, however, for storing the result of the formatting operation.
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @param uformatter A formatter object created by unumf_openForSkeletonAndLocale or similar.
+ * @param value The number to be formatted.
+ * @param uresult The object that will be mutated to store the result; see unumf_openResult.
+ * @param ec Set if an error occurs.
+ * @stable ICU 62
+ */
+U_CAPI void U_EXPORT2
+unumf_formatDouble(const UNumberFormatter* uformatter, double value, UFormattedNumber* uresult,
+ UErrorCode* ec);
+
+
+/**
+ * Uses a UNumberFormatter to format a decimal number to a UFormattedNumber. A string, field position, and
+ * other information can be retrieved from the UFormattedNumber.
+ *
+ * The UNumberFormatter can be shared between threads. Each thread should have its own local
+ * UFormattedNumber, however, for storing the result of the formatting operation.
+ *
+ * The syntax of the unformatted number is a "numeric string" as defined in the Decimal Arithmetic
+ * Specification, available at http://speleotrove.com/decimal
+ *
+ * NOTE: This is a C-compatible API; C++ users should build against numberformatter.h instead.
+ *
+ * @param uformatter A formatter object created by unumf_openForSkeletonAndLocale or similar.
+ * @param value The numeric string to be formatted.
+ * @param valueLen The length of the numeric string, or -1 if it is NUL-terminated.
+ * @param uresult The object that will be mutated to store the result; see unumf_openResult.
+ * @param ec Set if an error occurs.
+ * @stable ICU 62
+ */
+U_CAPI void U_EXPORT2
+unumf_formatDecimal(const UNumberFormatter* uformatter, const char* value, int32_t valueLen,
+ UFormattedNumber* uresult, UErrorCode* ec);
+
+
+
+
+
+
+/**
+ * Releases the UNumberFormatter created by unumf_openForSkeletonAndLocale().
+ *
+ * @param uformatter An object created by unumf_openForSkeletonAndLocale().
+ * @stable ICU 62
+ */
+U_CAPI void U_EXPORT2
+unumf_close(UNumberFormatter* uformatter);
+
+
+
+#if U_SHOW_CPLUSPLUS_API
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNumberFormatterPointer
+ * "Smart pointer" class; closes a UNumberFormatter via unumf_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage:
+ *
+ * LocalUNumberFormatterPointer uformatter(unumf_openForSkeletonAndLocale(...));
+ * // no need to explicitly call unumf_close()
+ *
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 62
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNumberFormatterPointer, UNumberFormatter, unumf_close);
+
+/**
+ * \class LocalUFormattedNumberPointer
+ * "Smart pointer" class; closes a UFormattedNumber via unumf_closeResult().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage:
+ *
+ * LocalUFormattedNumberPointer uformatter(unumf_openResult(...));
+ * // no need to explicitly call unumf_closeResult()
+ *
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 62
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUFormattedNumberPointer, UFormattedNumber, unumf_closeResult);
+
+U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+#endif //__UNUMBERFORMATTER_H__
diff --git a/third_party/icu4c/ndk_headers/unicode/uscript.h b/third_party/icu4c/ndk_headers/unicode/uscript.h
new file mode 100644
index 00000000000..4b986901111
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/uscript.h
@@ -0,0 +1,710 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * File USCRIPT.H
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 07/06/2001 Ram Creation.
+ ******************************************************************************
+ */
+
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Script Information
+ */
+
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * The current set of script code constants supports at least all scripts
+ * that are encoded in the version of Unicode which ICU currently supports.
+ * The names of the constants are usually derived from the
+ * Unicode script property value aliases.
+ * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
+ * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
+ *
+ * In addition, constants for many ISO 15924 script codes
+ * are included, for use with language tags, CLDR data, and similar.
+ * Some of those codes are not used in the Unicode Character Database (UCD).
+ * For example, there are no characters that have a UCD script property value of
+ * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
+ *
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
+ *
+ * Starting with ICU 55, script codes are only added when their scripts
+ * have been or will certainly be encoded in Unicode,
+ * and have been assigned Unicode script property value aliases,
+ * to ensure that their script names are stable and match the names of the constants.
+ * Script codes like Latf and Aran that are not subject to separate encoding
+ * may be added at any time.
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+ /*
+ * Note: UScriptCode constants and their ISO script code comments
+ * are parsed by preparseucd.py.
+ * It matches lines like
+ * USCRIPT_(OR of each set element)*
.
+ * (Java/ICU/Perl regex stops at the first match of an OR.)
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_CONTAINED = 1,
+ /**
+ * Continues a span() while there is a set element at the current position.
+ * Increments by the longest matching element at each position.
+ * (For characters only, this is like while contains(current)==true).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set only contains single characters, then this is the same
+ * as USET_SPAN_CONTAINED.
+ *
+ * If a set contains strings, then the span will be the longest substring
+ * with a match at each position with the longest single set element (character or string).
+ *
+ * Use this span condition together with other longest-match algorithms,
+ * such as ICU converters (ucnv_getUnicodeSet()).
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_SIMPLE = 2,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last span condition.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ USET_SPAN_CONDITION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} USetSpanCondition;
+
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set. Limited manipulations are
+ * possible directly on a serialized set. See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+ /**
+ * The serialized Unicode Set.
+ * @stable ICU 2.4
+ */
+ const uint16_t *array;
+ /**
+ * The length of the array that contains BMP characters.
+ * @stable ICU 2.4
+ */
+ int32_t bmpLength;
+ /**
+ * The total length of the array.
+ * @stable ICU 2.4
+ */
+ int32_t length;
+ /**
+ * A small buffer for the array to reduce memory allocations.
+ * @stable ICU 2.4
+ */
+ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive. If start > end
+ * then an empty set is created (same as using uset_openEmpty()).
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object. This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_close(USet* set);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return false.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return false.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * This is equivalent to
+ * uset_complementRange(set, 0, 0x10FFFF)
.
+ *
+ * Note: This performs a symmetric difference with all code points
+ * and thus retains all multicharacter strings.
+ * In order to achieve a “code point complement” (all code points minus this set),
+ * the easiest is to uset_complement(set); uset_removeAllStrings(set);
.
+ *
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Returns true if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * Returns true if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns true if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+/**
+ * Returns the number of characters and strings contained in this set.
+ * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
+ *
+ * This is slower than uset_getRangeCount() and uset_getItemCount() because
+ * it counts the code points of all ranges.
+ *
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ * @see uset_getRangeCount
+ */
+U_CAPI int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * @param set the set
+ * @return the number of ranges in this set.
+ * @stable ICU 70
+ * @see uset_getItemCount
+ * @see uset_getItem
+ * @see uset_size
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getRangeCount(const USet *set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string (which can be the empty string).
+ *
+ * If itemIndex
is less than uset_getRangeCount(), then this function returns 0,
+ * and the range is *start
..*end
.
+ *
+ * If itemIndex
is at least uset_getRangeCount() and less than uset_getItemCount(), then
+ * this function copies the string into str[strCapacity]
and
+ * returns the length of the string (0 for the empty string).
+ *
+ * If itemIndex
is out of range, then this function returns -1.
+ *
+ * Note that 0 is returned for each range as well as for the empty string.
+ *
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character in range, inclusive;
+ * can be NULL for a string item
+ * @param end pointer to variable to receive last character in range, inclusive;
+ * can be NULL for a string item
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
+ * @return the length of the string (0 or >= 2), or 0 if the item is a range,
+ * or -1 if the itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+#endif
diff --git a/third_party/icu4c/ndk_headers/unicode/utrans.h b/third_party/icu4c/ndk_headers/unicode/utrans.h
new file mode 100644
index 00000000000..bee7ffadbb0
--- /dev/null
+++ b/third_party/icu4c/ndk_headers/unicode/utrans.h
@@ -0,0 +1,576 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1997-2011,2014-2015 International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* Date Name Description
+* 06/21/00 aliu Creation.
+*******************************************************************************
+*/
+
+#ifndef UTRANS_H
+#define UTRANS_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/urep.h"
+#include "unicode/parseerr.h"
+#include "unicode/uenum.h"
+#include "unicode/uset.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ */
+/**
+ * \file
+ * \brief C API: Transliterator
+ *
+ * Transliteration
+ * The data structures and functions described in this header provide
+ * transliteration services. Transliteration services are implemented
+ * as C++ classes. The comments and documentation in this header
+ * assume the reader is familiar with the C++ headers translit.h and
+ * associated documentation.
+ *
+ * A significant but incomplete subset of the C++ transliteration
+ * services are available to C code through this header. In order to
+ * access more complex transliteration services, refer to the C++
+ * headers and documentation.
+ *
+ * There are two sets of functions for working with transliterator IDs:
+ *
+ * An old, deprecated set uses char * IDs, which works for true and pure
+ * identifiers that these APIs were designed for,
+ * for example "Cyrillic-Latin".
+ * It does not work when the ID contains filters ("[:Script=Cyrl:]")
+ * or even a complete set of rules because then the ID string contains more
+ * than just "invariant" characters (see utypes.h).
+ *
+ * A new set of functions replaces the old ones and uses UChar * IDs,
+ * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.)
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+
+/**
+ * An opaque transliterator for use in C. Open with utrans_openxxx()
+ * and close with utrans_close() when done. Equivalent to the C++ class
+ * Transliterator and its subclasses.
+ * @see Transliterator
+ * @stable ICU 2.0
+ */
+typedef void* UTransliterator;
+
+/**
+ * Direction constant indicating the direction in a transliterator,
+ * e.g., the forward or reverse rules of a RuleBasedTransliterator.
+ * Specified when a transliterator is opened. An "A-B" transliterator
+ * transliterates A to B when operating in the forward direction, and
+ * B to A when operating in the reverse direction.
+ * @stable ICU 2.0
+ */
+typedef enum UTransDirection {
+
+ /**
+ * UTRANS_FORWARD means from <source> to <target> for a
+ * transliterator with ID <source>-<target>. For a transliterator
+ * opened using a rule, it means forward direction rules, e.g.,
+ * "A > B".
+ */
+ UTRANS_FORWARD,
+
+ /**
+ * UTRANS_REVERSE means from <target> to <source> for a
+ * transliterator with ID <source>-<target>. For a transliterator
+ * opened using a rule, it means reverse direction rules, e.g.,
+ * "A < B".
+ */
+ UTRANS_REVERSE
+
+} UTransDirection;
+
+/**
+ * Position structure for utrans_transIncremental() incremental
+ * transliteration. This structure defines two substrings of the text
+ * being transliterated. The first region, [contextStart,
+ * contextLimit), defines what characters the transliterator will read
+ * as context. The second region, [start, limit), defines what
+ * characters will actually be transliterated. The second region
+ * should be a subset of the first.
+ *
+ * 0 <= start <=
+ * limit
.
+ * @param limit pointer to the ending index, exclusive; start <=
+ * limit <= repFunc->length(rep)
. Upon return, *limit will
+ * contain the new limit index. The text previously occupying
+ * [start, limit)
has been transliterated, possibly to a
+ * string of a different length, at [start,
+ *
new-limit)
, where new-limit
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_trans(const UTransliterator* trans,
+ UReplaceable* rep,
+ const UReplaceableCallbacks* repFunc,
+ int32_t start,
+ int32_t* limit,
+ UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UReplaceable text buffer that can
+ * be transliterated unambiguously. This method is typically called
+ * after new text has been inserted, e.g. as a result of a keyboard
+ * event. The transliterator will try to transliterate characters of
+ * rep
between index.cursor
and
+ * index.limit
. Characters before
+ * index.cursor
will not be changed.
+ *
+ * index
will be updated.
+ * index.start
will be advanced to the first
+ * character that future calls to this method will read.
+ * index.cursor
and index.limit
will
+ * be adjusted to delimit the range of text that future calls to
+ * this method may change.
+ *
+ * index.start
and index.limit
+ * set to indicate the portion of text
to be
+ * transliterated, and index.cursor == index.start
.
+ * Thereafter, index
can be used without
+ * modification in future calls, provided that all changes to
+ * text
are made via this method.
+ *
+ * 0 <= start <=
+ * limit
.
+ * @param limit pointer to the ending index, exclusive; start <=
+ * limit <= repFunc->length(rep)
. Upon return, *limit will
+ * contain the new limit index. The text previously occupying
+ * [start, limit)
has been transliterated, possibly to a
+ * string of a different length, at [start,
+ *
new-limit)
, where new-limit
+ * is the return value.
+ * @param status a pointer to the UErrorCode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_transUChars(const UTransliterator* trans,
+ UChar* text,
+ int32_t* textLength,
+ int32_t textCapacity,
+ int32_t start,
+ int32_t* limit,
+ UErrorCode* status);
+
+/**
+ * Transliterate the portion of the UChar* text buffer that can be
+ * transliterated unambiguously. See utrans_transIncremental(). The
+ * string is passed in in a UChar* buffer. The string is modified in
+ * place. If the result is longer than textCapacity, it is truncated.
+ * The actual length of the result is returned in *textLength, if
+ * textLength is non-NULL. *textLength may be greater than
+ * textCapacity, but only textCapacity UChars will be written to
+ * *text, including the zero terminator. See utrans_transIncremental()
+ * for usage details.
+ *
+ * @param trans the transliterator
+ * @param text a pointer to a buffer containing the text to be
+ * transliterated on input and the result text on output.
+ * @param textLength a pointer to the length of the string in text.
+ * If the length is -1 then the string is assumed to be
+ * zero-terminated. Upon return, the new length is stored in
+ * *textLength. If textLength is NULL then the string is assumed to
+ * be zero-terminated.
+ * @param textCapacity the length of the text buffer
+ * @param pos a struct containing the start and limit indices of the
+ * text to be read and the text to be transliterated
+ * @param status a pointer to the UErrorCode
+ * @see utrans_transIncremental
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+utrans_transIncrementalUChars(const UTransliterator* trans,
+ UChar* text,
+ int32_t* textLength,
+ int32_t textCapacity,
+ UTransPosition* pos,
+ UErrorCode* status);
+
+/**
+ * Create a rule string that can be passed to utrans_openU to recreate this
+ * transliterator.
+ *
+ * @param trans The transliterator
+ * @param escapeUnprintable if true then convert unprintable characters to their
+ * hex escape representations, \\uxxxx or \\Uxxxxxxxx.
+ * Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param result A pointer to a buffer to receive the rules.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to the UErrorCode. In case of error status, the
+ * contents of result are undefined.
+ * @return int32_t The length of the rule string (may be greater than resultLength,
+ * in which case an error is returned).
+ * @stable ICU 53
+ */
+U_CAPI int32_t U_EXPORT2
+utrans_toRules( const UTransliterator* trans,
+ UBool escapeUnprintable,
+ UChar* result, int32_t resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the set of all characters that may be modified in the input text by
+ * this UTransliterator, optionally ignoring the transliterator's current filter.
+ * @param trans The transliterator.
+ * @param ignoreFilter If false, the returned set incorporates the
+ * UTransliterator's current filter; if the filter is changed,
+ * the return value of this function will change. If true, the
+ * returned set ignores the effect of the UTransliterator's
+ * current filter.
+ * @param fillIn Pointer to a USet object to receive the modifiable characters
+ * set. Previous contents of fillIn are lost. If fillIn is
+ * NULL, then a new USet is created and returned. The caller
+ * owns the result and must dispose of it by calling uset_close.
+ * @param status A pointer to the UErrorCode.
+ * @return USet* Either fillIn, or if fillIn is NULL, a pointer to a
+ * newly-allocated USet that the user must close. In case of
+ * error, NULL is returned.
+ * @stable ICU 53
+ */
+U_CAPI USet* U_EXPORT2
+utrans_getSourceSet(const UTransliterator* trans,
+ UBool ignoreFilter,
+ USet* fillIn,
+ UErrorCode* status);
+
+/* deprecated API ----------------------------------------------------------- */
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* see utrans.h documentation for why these functions are deprecated */
+
+
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif
--
Gitee
From a8c04f74ee059f18eabe81adb34298841629d42c Mon Sep 17 00:00:00 2001
From: mengjingzhimo 0 <= start <=
- * limit
.
- * @param limit pointer to the ending index, exclusive; start <=
- * limit <= repFunc->length(rep)
. Upon return, *limit will
- * contain the new limit index. The text previously occupying
- * [start, limit)
has been transliterated, possibly to a
- * string of a different length, at [start,
- *
new-limit)
, where new-limit
- * is the return value.
- * @param status a pointer to the UErrorCode
- * @stable ICU 2.0
- */
-U_CAPI void U_EXPORT2
-utrans_trans(const UTransliterator* trans,
- UReplaceable* rep,
- const UReplaceableCallbacks* repFunc,
- int32_t start,
- int32_t* limit,
- UErrorCode* status);
-
-/**
- * Transliterate the portion of the UReplaceable text buffer that can
- * be transliterated unambiguously. This method is typically called
- * after new text has been inserted, e.g. as a result of a keyboard
- * event. The transliterator will try to transliterate characters of
- * rep
between index.cursor
and
- * index.limit
. Characters before
- * index.cursor
will not be changed.
- *
- * index
will be updated.
- * index.start
will be advanced to the first
- * character that future calls to this method will read.
- * index.cursor
and index.limit
will
- * be adjusted to delimit the range of text that future calls to
- * this method may change.
- *
- * index.start
and index.limit
- * set to indicate the portion of text
to be
- * transliterated, and index.cursor == index.start
.
- * Thereafter, index
can be used without
- * modification in future calls, provided that all changes to
- * text
are made via this method.
- *
- *