icu46/source/common/uloc.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/uloc.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (C) 1997-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 **********************************************************************

	6 *

	7 * File ULOC.CPP

	8 *

	9 * Modification History:

	10 *

	11 * Date Name Description

	12 * 04/01/97 aliu Creation.

	13 * 08/21/98 stephen JDK 1.2 sync

	14 * 12/08/98 rtg New Locale implementation and C API

	15 * 03/15/99 damiba overhaul.

	16 * 04/06/99 stephen changed setDefault() to realloc and copy

	17 * 06/14/99 stephen Changed calls to ures_open for new params

	18 * 07/21/99 stephen Modified setDefault() to propagate to C++

	19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,

	20 * brought canonicalization code into line with spec

	21 *****************************************************************************/

	22

	23 /*

	24 POSIX's locale format, from putil.c: [no spaces]

	25

	26 ll [ _CC ] [ . MM ] [ @ VV]

	27

	28 l = lang, C = ctry, M = charmap, V = variant

	29 */

	30

	31 #include "unicode/utypes.h"

	32 #include "unicode/ustring.h"

	33 #include "unicode/uloc.h"

	34

	35 #include "putilimp.h"

	36 #include "ustr_imp.h"

	37 #include "ulocimp.h"

	38 #include "umutex.h"

	39 #include "cstring.h"

	40 #include "cmemory.h"

	41 #include "ucln_cmn.h"

	42 #include "locmap.h"

	43 #include "uarrsort.h"

	44 #include "uenumimp.h"

	45 #include "uassert.h"

	46

	47 #include <stdio.h> /* for sprintf */

	48

	49 /* ### Declarations **************************************************/

	50

	51 /* Locale stuff from locid.cpp */

	52 U_CFUNC void locale_set_default(const char *id);

	53 U_CFUNC const char *locale_get_default(void);

	54 U_CFUNC int32_t

	55 locale_getKeywords(const char *localeID,

	56 char prev,

	57 char *keywords, int32_t keywordCapacity,

	58 char values, int32_t valuesCapacity, int32_t valLen,

	59 UBool valuesToo,

	60 UErrorCode *status);

	61

	62 /* ### Data tables **************************************************/

	63

	64 /**

	65 * Table of language codes, both 2- and 3-letter, with preference

	66 * given to 2-letter codes where possible. Includes 3-letter codes

	67 * that lack a 2-letter equivalent.

	68 *

	69 * This list must be in sorted order. This list is returned directly

	70 * to the user by some API.

	71 *

	72 * This list must be kept in sync with LANGUAGES_3, with corresponding

	73 * entries matched.

	74 *

	75 * This table should be terminated with a NULL entry, followed by a

	76 * second list, and another NULL entry. The first list is visible to

	77 * user code when this array is returned by API. The second list

	78 * contains codes we support, but do not expose through user API.

	79 *

	80 * Notes

	81 *

	82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to

	83 * include the revisions up to 2001/7/27 CWB

	84 *

	85 * The 3 character codes are the terminology codes like RFC 3066. This

	86 * is compatible with prior ICU codes

	87 *

	88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the

	89 * table but now at the end of the table because 3 character codes are

	90 * duplicates. This avoids bad searches going from 3 to 2 character

	91 * codes.

	92 *

	93 * The range qaa-qtz is reserved for local use

	94 */

	95 static const char * const LANGUAGES[] = {

	96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",

	97 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",

	98 "ang", "anp", "apa",

	99 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",

	100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",

	101 "bai", "bal", "ban", "bas", "bat", "be", "bej",

	102 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",

	103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",

	104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",

	105 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",

	106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",

	107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",

	108 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",

	109 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",

	110 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",

	111 "enm", "eo", "es", "et", "eu", "ewo", "fa",

	112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",

	113 "fr", "frm", "fro", "frr", "frs", "fur", "fy",

	114 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",

	115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",

	116 "grc", "gsw", "gu", "gv", "gwi",

	117 "ha", "hai", "haw", "he", "hi", "hil", "him",

	118 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",

	119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",

	120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",

	121 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",

	122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",

	123 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",

	124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",

	125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",

	126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",

	127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",

	128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",

	129 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",

	130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",

	131 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",

	132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",

	133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",

	134 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",

	135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",

	136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",

	137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",

	138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",

	139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",

	140 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",

	141 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",

	142 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",

	143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",

	144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",

	145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",

	146 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",

	147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",

	148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",

	149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",

	150 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",

	151 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",

	152 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",

	153 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",

	154 "zu", "zun", "zxx", "zza",

	155 NULL,

	156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */

	157 NULL

	158 };

	159 static const char* const DEPRECATED_LANGUAGES[]={

	160 "in", "iw", "ji", "jw", NULL, NULL

	161 };

	162 static const char* const REPLACEMENT_LANGUAGES[]={

	163 "id", "he", "yi", "jv", NULL, NULL

	164 };

	165

	166 /**

	167 * Table of 3-letter language codes.

	168 *

	169 * This is a lookup table used to convert 3-letter language codes to

	170 * their 2-letter equivalent, where possible. It must be kept in sync

	171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the

	172 * same language as LANGUAGES_3[i]. The commented-out lines are

	173 * copied from LANGUAGES to make eyeballing this baby easier.

	174 *

	175 * Where a 3-letter language code has no 2-letter equivalent, the

	176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].

	177 *

	178 * This table should be terminated with a NULL entry, followed by a

	179 * second list, and another NULL entry. The two lists correspond to

	180 * the two lists in LANGUAGES.

	181 */

	182 static const char * const LANGUAGES_3[] = {

	183 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */

	184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",

	185 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */

	186 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",

	187 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */

	188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",

	189 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */

	190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",

	191 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */

	192 "bai", "bal", "ban", "bas", "bat", "bel", "bej",

	193 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */

	194 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",

	195 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */

	196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",

	197 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */

	198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",

	199 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */

	200 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",

	201 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */

	202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",

	203 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */

	204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",

	205 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */

	206 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",

	207 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */

	208 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",

	209 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */

	210 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",

	211 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */

	212 "enm", "epo", "spa", "est", "eus", "ewo", "fas",

	213 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */

	214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",

	215 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */

	216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",

	217 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */

	218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",

	219 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */

	220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",

	221 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */

	222 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",

	223 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */

	224 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",

	225 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */

	226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",

	227 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */

	228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",

	229 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */

	230 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",

	231 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi", */

	232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",

	233 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */

	234 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",

	235 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */

	236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",

	237 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */

	238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",

	239 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */

	240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",

	241 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */

	242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",

	243 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */

	244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",

	245 /* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */

	246 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",

	247 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */

	248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",

	249 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */

	250 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",

	251 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */

	252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",

	253 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */

	254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",

	255 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */

	256 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",

	257 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */

	258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",

	259 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */

	260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",

	261 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */

	262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",

	263 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */

	264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",

	265 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */

	266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",

	267 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */

	268 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",

	269 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */

	270 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",

	271 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */

	272 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",

	273 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */

	274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",

	275 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */

	276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",

	277 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */

	278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",

	279 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */

	280 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",

	281 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */

	282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",

	283 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */

	284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",

	285 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */

	286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",

	287 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */

	288 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",

	289 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */

	290 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",

	291 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */

	292 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",

	293 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */

	294 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",

	295 /* "zu", "zun", "zxx", "zza", */

	296 "zul", "zun", "zxx", "zza",

	297 NULL,

	298 /* "in", "iw", "ji", "jw", "sh", */

	299 "ind", "heb", "yid", "jaw", "srp",

	300 NULL

	301 };

	302

	303 /**

	304 * Table of 2-letter country codes.

	305 *

	306 * This list must be in sorted order. This list is returned directly

	307 * to the user by some API.

	308 *

	309 * This list must be kept in sync with COUNTRIES_3, with corresponding

	310 * entries matched.

	311 *

	312 * This table should be terminated with a NULL entry, followed by a

	313 * second list, and another NULL entry. The first list is visible to

	314 * user code when this array is returned by API. The second list

	315 * contains codes we support, but do not expose through user API.

	316 *

	317 * Notes:

	318 *

	319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per

	320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added

	321 * new codes keeping the old ones for compatibility updated to include

	322 * 1999/12/03 revisions CWB

	323 *

	324 * RO(ROM) is now RO(ROU) according to

	325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3 e-rou.html

	326 */

	327 static const char * const COUNTRIES[] = {

	328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",

	329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",

	330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",

	331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",

	332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",

	333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",

	334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",

	335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",

	336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",

	337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",

	338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",

	339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",

	340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",

	341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",

	342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",

	343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",

	344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",

	345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",

	346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",

	347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",

	348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",

	349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",

	350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",

	351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",

	352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",

	353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",

	354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",

	355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",

	356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",

	357 "WS", "YE", "YT", "ZA", "ZM", "ZW",

	358 NULL,

	359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */

	360 NULL

	361 };

	362

	363 static const char* const DEPRECATED_COUNTRIES[] ={

	364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* de precated country list */

	365 };

	366 static const char* const REPLACEMENT_COUNTRIES[] = {

	367 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */

	368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* r eplacement country codes */

	369 };

	370

	371 /**

	372 * Table of 3-letter country codes.

	373 *

	374 * This is a lookup table used to convert 3-letter country codes to

	375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.

	376 * For all valid i, COUNTRIES[i] must refer to the same country as

	377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES

	378 * to make eyeballing this baby easier.

	379 *

	380 * This table should be terminated with a NULL entry, followed by a

	381 * second list, and another NULL entry. The two lists correspond to

	382 * the two lists in COUNTRIES.

	383 */

	384 static const char * const COUNTRIES_3[] = {

	385 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */

	386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",

	387 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */

	388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",

	389 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */

	390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",

	391 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */

	392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",

	393 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */

	394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",

	395 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */

	396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",

	397 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */

	398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",

	399 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */

	400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",

	401 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */

	402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",

	403 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */

	404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",

	405 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */

	406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",

	407 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */

	408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",

	409 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */

	410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",

	411 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */

	412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",

	413 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */

	414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",

	415 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */

	416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",

	417 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */

	418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",

	419 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */

	420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",

	421 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */

	422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",

	423 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */

	424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",

	425 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */

	426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",

	427 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */

	428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",

	429 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */

	430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",

	431 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */

	432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",

	433 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */

	434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",

	435 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */

	436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",

	437 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */

	438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",

	439 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */

	440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",

	441 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */

	442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",

	443 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */

	444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",

	445 NULL,

	446 /* "FX", "CS", "RO", "TP", "YU", "ZR", */

	447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",

	448 NULL

	449 };

	450

	451 typedef struct CanonicalizationMap {

	452 const char id; / input ID */

	453 const char canonicalID; / canonicalized output ID */

	454 const char keyword; / keyword, or NULL if none */

	455 const char value; / keyword value, or NULL if kw==NULL */

	456 } CanonicalizationMap;

	457

	458 /**

	459 * A map to canonicalize locale IDs. This handles a variety of

	460 * different semantic kinds of transformations.

	461 */

	462 static const CanonicalizationMap CANONICALIZE_MAP[] = {

	463 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */

	464 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */

	465 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */

	466 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */

	467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */

	468 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */

	469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },

	470 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */

	471 { "de_1901", "de__1901", NULL, NULL }, /* registered name */

	472 { "de_1906", "de__1906", NULL, NULL }, /* registered name */

	473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */

	474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },

	475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },

	476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },

	477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },

	478 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */

	479 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */

	480 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },

	481 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },

	482 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */

	483 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },

	484 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },

	485 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },

	486 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },

	487 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },

	488 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },

	489 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },

	490 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },

	491 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */

	492 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },

	493 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */

	494 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */

	495 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },

	496 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },

	497 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },

	498 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */

	499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */

	500 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */

	501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */

	502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */

	503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */

	504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */

	505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */

	506 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */

	507 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */

	508 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */

	509 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */

	510 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */

	511 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */

	512 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */

	513 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */

	514 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */

	515 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */

	516 };

	517

	518 typedef struct VariantMap {

	519 const char variant; / input ID */

	520 const char keyword; / keyword, or NULL if none */

	521 const char value; / keyword value, or NULL if kw==NULL */

	522 } VariantMap;

	523

	524 static const VariantMap VARIANT_MAP[] = {

	525 { "EURO", "currency", "EUR" },

	526 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */

	527 { "STROKE", "collation", "stroke" } /* Solaris variant */

	528 };

	529

	530 /* ### BCP47 Conversion *******************************************/

	531 /* Test if the locale id has BCP47 u extension and does not have '@' */

	532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortes tSubtagLength(localeID) == 1)

	533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails * /

	534 #define _ConvertBCP47(finalID, id, buffer, length,err) \

	535 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| U_FAILURE (*err)) { \

	536 finalID=id; \

	537 } else { \

	538 finalID=buffer; \

	539 }

	540 /* Gets the size of the shortest subtag in the given localeID. */

	541 static int32_t getShortestSubtagLength(const char *localeID) {

	542 int32_t localeIDLength = uprv_strlen(localeID);

	543 int32_t length = localeIDLength;

	544 int32_t tmpLength = 0;

	545 int32_t i;

	546 UBool reset = TRUE;

	547

	548 for (i = 0; i < localeIDLength; i++) {

	549 if (localeID[i] != '_' && localeID[i] != '-') {

	550 if (reset) {

	551 tmpLength = 0;

	552 reset = FALSE;

	553 }

	554 tmpLength++;

	555 } else {

	556 if (tmpLength != 0 && tmpLength < length) {

	557 length = tmpLength;

	558 }

	559 reset = TRUE;

	560 }

	561 }

	562

	563 return length;

	564 }

	565

	566 /* ### Keywords **************************************************/

	567

	568 #define ULOC_KEYWORD_BUFFER_LEN 25

	569 #define ULOC_MAX_NO_KEYWORDS 25

	570

	571 U_CAPI const char * U_EXPORT2

	572 locale_getKeywordsStart(const char *localeID) {

	573 const char *result = NULL;

	574 if((result = uprv_strchr(localeID, '@')) != NULL) {

	575 return result;

	576 }

	577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)

	578 else {

	579 /* We do this because the @ sign is variant, and the @ sign used on one

	580 EBCDIC machine won't be compiled the same way on other EBCDIC based

	581 machines. */

	582 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xA E, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };

	583 const uint8_t *charToFind = ebcdicSigns;

	584 while(*charToFind) {

	585 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {

	586 return result;

	587 }

	588 charToFind++;

	589 }

	590 }

	591 #endif

	592 return NULL;

	593 }

	594

	595 /**

	596 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]

	597 * @param keywordName incoming name to be canonicalized

	598 * @param status return status (keyword too long)

	599 * @return length of the keyword name

	600 */

	601 static int32_t locale_canonKeywordName(char buf, const char keywordName, UErro rCode *status)

	602 {

	603 int32_t i;

	604 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);

	605

	606 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {

	607 /* keyword name too long for internal buffer */

	608 *status = U_INTERNAL_PROGRAM_ERROR;

	609 return 0;

	610 }

	611

	612 /* normalize the keyword name */

	613 for(i = 0; i < keywordNameLen; i++) {

	614 buf[i] = uprv_tolower(keywordName[i]);

	615 }

	616 buf[i] = 0;

	617

	618 return keywordNameLen;

	619 }

	620

	621 typedef struct {

	622 char keyword[ULOC_KEYWORD_BUFFER_LEN];

	623 int32_t keywordLen;

	624 const char *valueStart;

	625 int32_t valueLen;

	626 } KeywordStruct;

	627

	628 static int32_t U_CALLCONV

	629 compareKeywordStructs(const void context, const void left, const void *right) {

	630 const char* leftString = ((const KeywordStruct *)left)->keyword;

	631 const char* rightString = ((const KeywordStruct *)right)->keyword;

	632 return uprv_strcmp(leftString, rightString);

	633 }

	634

	635 /**

	636 * Both addKeyword and addValue must already be in canonical form.

	637 * Either both addKeyword and addValue are NULL, or neither is NULL.

	638 * If they are not NULL they must be zero terminated.

	639 * If addKeyword is not NULL is must have length small enough to fit in KeywordS truct.keyword.

	640 */

	641 static int32_t

	642 _getKeywords(const char *localeID,

	643 char prev,

	644 char *keywords, int32_t keywordCapacity,

	645 char values, int32_t valuesCapacity, int32_t valLen,

	646 UBool valuesToo,

	647 const char* addKeyword,

	648 const char* addValue,

	649 UErrorCode *status)

	650 {

	651 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];

	652

	653 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;

	654 int32_t numKeywords = 0;

	655 const char* pos = localeID;

	656 const char* equalSign = NULL;

	657 const char* semicolon = NULL;

	658 int32_t i = 0, j, n;

	659 int32_t keywordsLen = 0;

	660 int32_t valuesLen = 0;

	661

	662 if(prev == '@') { /* start of keyword definition */

	663 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */

	664 do {

	665 UBool duplicate = FALSE;

	666 /* skip leading spaces */

	667 while(*pos == ' ') {

	668 pos++;

	669 }

	670 if (!pos) { / handle trailing "; " */

	671 break;

	672 }

	673 if(numKeywords == maxKeywords) {

	674 *status = U_INTERNAL_PROGRAM_ERROR;

	675 return 0;

	676 }

	677 equalSign = uprv_strchr(pos, '=');

	678 semicolon = uprv_strchr(pos, ';');

	679 /* lack of '=' [foo@currency] is illegal */

	680 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */

	681 if(!equalSign \|\| (semicolon && semicolon<equalSign)) {

	682 *status = U_INVALID_FORMAT_ERROR;

	683 return 0;

	684 }

	685 /* need to normalize both keyword and keyword name */

	686 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {

	687 /* keyword name too long for internal buffer */

	688 *status = U_INTERNAL_PROGRAM_ERROR;

	689 return 0;

	690 }

	691 for(i = 0, n = 0; i < equalSign - pos; ++i) {

	692 if (pos[i] != ' ') {

	693 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]) ;

	694 }

	695 }

	696 keywordList[numKeywords].keyword[n] = 0;

	697 keywordList[numKeywords].keywordLen = n;

	698 /* now grab the value part. First we skip the '=' */

	699 equalSign++;

	700 /* then we leading spaces */

	701 while(*equalSign == ' ') {

	702 equalSign++;

	703 }

	704 keywordList[numKeywords].valueStart = equalSign;

	705

	706 pos = semicolon;

	707 i = 0;

	708 if(pos) {

	709 while(*(pos - i - 1) == ' ') {

	710 i++;

	711 }

	712 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);

	713 pos++;

	714 } else {

	715 i = (int32_t)uprv_strlen(equalSign);

	716 while(equalSign[i-1] == ' ') {

	717 i--;

	718 }

	719 keywordList[numKeywords].valueLen = i;

	720 }

	721 /* If this is a duplicate keyword, then ignore it */

	722 for (j=0; j<numKeywords; ++j) {

	723 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords] .keyword) == 0) {

	724 duplicate = TRUE;

	725 break;

	726 }

	727 }

	728 if (!duplicate) {

	729 ++numKeywords;

	730 }

	731 } while(pos);

	732

	733 /* Handle addKeyword/addValue. */

	734 if (addKeyword != NULL) {

	735 UBool duplicate = FALSE;

	736 U_ASSERT(addValue != NULL);

	737 /* Search for duplicate; if found, do nothing. Explicit keyword

	738 overrides addKeyword. */

	739 for (j=0; j<numKeywords; ++j) {

	740 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {

	741 duplicate = TRUE;

	742 break;

	743 }

	744 }

	745 if (!duplicate) {

	746 if (numKeywords == maxKeywords) {

	747 *status = U_INTERNAL_PROGRAM_ERROR;

	748 return 0;

	749 }

	750 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);

	751 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKe yword);

	752 keywordList[numKeywords].valueStart = addValue;

	753 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValu e);

	754 ++numKeywords;

	755 }

	756 } else {

	757 U_ASSERT(addValue == NULL);

	758 }

	759

	760 /* now we have a list of keywords */

	761 /* we need to sort it */

	762 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareK eywordStructs, NULL, FALSE, status);

	763

	764 /* Now construct the keyword part */

	765 for(i = 0; i < numKeywords; i++) {

	766 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {

	767 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);

	768 if(valuesToo) {

	769 keywords[keywordsLen + keywordList[i].keywordLen] = '=';

	770 } else {

	771 keywords[keywordsLen + keywordList[i].keywordLen] = 0;

	772 }

	773 }

	774 keywordsLen += keywordList[i].keywordLen + 1;

	775 if(valuesToo) {

	776 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {

	777 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart , keywordList[i].valueLen);

	778 }

	779 keywordsLen += keywordList[i].valueLen;

	780

	781 if(i < numKeywords - 1) {

	782 if(keywordsLen < keywordCapacity) {

	783 keywords[keywordsLen] = ';';

	784 }

	785 keywordsLen++;

	786 }

	787 }

	788 if(values) {

	789 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {

	790 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);

	791 values[valuesLen + keywordList[i].valueLen] = 0;

	792 }

	793 valuesLen += keywordList[i].valueLen + 1;

	794 }

	795 }

	796 if(values) {

	797 values[valuesLen] = 0;

	798 if(valLen) {

	799 *valLen = valuesLen;

	800 }

	801 }

	802 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);

	803 } else {

	804 return 0;

	805 }

	806 }

	807

	808 U_CFUNC int32_t

	809 locale_getKeywords(const char *localeID,

	810 char prev,

	811 char *keywords, int32_t keywordCapacity,

	812 char values, int32_t valuesCapacity, int32_t valLen,

	813 UBool valuesToo,

	814 UErrorCode *status) {

	815 return _getKeywords(localeID, prev, keywords, keywordCapacity,

	816 values, valuesCapacity, valLen, valuesToo,

	817 NULL, NULL, status);

	818 }

	819

	820 U_CAPI int32_t U_EXPORT2

	821 uloc_getKeywordValue(const char* localeID,

	822 const char* keywordName,

	823 char* buffer, int32_t bufferCapacity,

	824 UErrorCode* status)

	825 {

	826 const char* startSearchHere = NULL;

	827 const char* nextSeparator = NULL;

	828 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

	829 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

	830 int32_t i = 0;

	831 int32_t result = 0;

	832

	833 if(status && U_SUCCESS(*status) && localeID) {

	834 char tempBuffer[ULOC_FULLNAME_CAPACITY];

	835 const char* tmpLocaleID;

	836

	837 if (_hasBCP47Extension(localeID)) {

	838 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), s tatus);

	839 } else {

	840 tmpLocaleID=localeID;

	841 }

	842

	843 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn 't this be locale_getKeywordsStart ? */

	844 if(startSearchHere == NULL) {

	845 /* no keywords, return at once */

	846 return 0;

	847 }

	848

	849 locale_canonKeywordName(keywordNameBuffer, keywordName, status);

	850 if(U_FAILURE(*status)) {

	851 return 0;

	852 }

	853

	854 /* find the first keyword */

	855 while(startSearchHere) {

	856 startSearchHere++;

	857 /* skip leading spaces (allowed?) */

	858 while(*startSearchHere == ' ') {

	859 startSearchHere++;

	860 }

	861 nextSeparator = uprv_strchr(startSearchHere, '=');

	862 /* need to normalize both keyword and keyword name */

	863 if(!nextSeparator) {

	864 break;

	865 }

	866 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {

	867 /* keyword name too long for internal buffer */

	868 *status = U_INTERNAL_PROGRAM_ERROR;

	869 return 0;

	870 }

	871 for(i = 0; i < nextSeparator - startSearchHere; i++) {

	872 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);

	873 }

	874 /* trim trailing spaces */

	875 while(startSearchHere[i-1] == ' ') {

	876 i--;

	877 }

	878 localeKeywordNameBuffer[i] = 0;

	879

	880 startSearchHere = uprv_strchr(nextSeparator, ';');

	881

	882 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {

	883 nextSeparator++;

	884 while(*nextSeparator == ' ') {

	885 nextSeparator++;

	886 }

	887 /* we actually found the keyword. Copy the value */

	888 if(startSearchHere && startSearchHere - nextSeparator < bufferCapa city) {

	889 while(*(startSearchHere-1) == ' ') {

	890 startSearchHere--;

	891 }

	892 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSepa rator);

	893 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(st artSearchHere - nextSeparator), status);

	894 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */

	895 i = (int32_t)uprv_strlen(nextSeparator);

	896 while(nextSeparator[i - 1] == ' ') {

	897 i--;

	898 }

	899 uprv_strncpy(buffer, nextSeparator, i);

	900 result = u_terminateChars(buffer, bufferCapacity, i, status);

	901 } else {

	902 /* give a bigger buffer, please */

	903 *status = U_BUFFER_OVERFLOW_ERROR;

	904 if(startSearchHere) {

	905 result = (int32_t)(startSearchHere - nextSeparator);

	906 } else {

	907 result = (int32_t)uprv_strlen(nextSeparator);

	908 }

	909 }

	910 return result;

	911 }

	912 }

	913 }

	914 return 0;

	915 }

	916

	917 U_CAPI int32_t U_EXPORT2

	918 uloc_setKeywordValue(const char* keywordName,

	919 const char* keywordValue,

	920 char* buffer, int32_t bufferCapacity,

	921 UErrorCode* status)

	922 {

	923 /* TODO: sorting. removal. */

	924 int32_t keywordNameLen;

	925 int32_t keywordValueLen;

	926 int32_t bufLen;

	927 int32_t needLen = 0;

	928 int32_t foundValueLen;

	929 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */

	930 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

	931 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

	932 int32_t i = 0;

	933 int32_t rc;

	934 char* nextSeparator = NULL;

	935 char* nextEqualsign = NULL;

	936 char* startSearchHere = NULL;

	937 char* keywordStart = NULL;

	938 char *insertHere = NULL;

	939 if(U_FAILURE(*status)) {

	940 return -1;

	941 }

	942 if(bufferCapacity>1) {

	943 bufLen = (int32_t)uprv_strlen(buffer);

	944 } else {

	945 *status = U_ILLEGAL_ARGUMENT_ERROR;

	946 return 0;

	947 }

	948 if(bufferCapacity<bufLen) {

	949 /* The capacity is less than the length?! Is this NULL terminated? */

	950 *status = U_ILLEGAL_ARGUMENT_ERROR;

	951 return 0;

	952 }

	953 if(keywordValue && !*keywordValue) {

	954 keywordValue = NULL;

	955 }

	956 if(keywordValue) {

	957 keywordValueLen = (int32_t)uprv_strlen(keywordValue);

	958 } else {

	959 keywordValueLen = 0;

	960 }

	961 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, sta tus);

	962 if(U_FAILURE(*status)) {

	963 return 0;

	964 }

	965 startSearchHere = (char*)locale_getKeywordsStart(buffer);

	966 if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {

	967 if(!keywordValue) { /* no keywords = nothing to remove */

	968 return bufLen;

	969 }

	970

	971 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;

	972 if(startSearchHere) { /* had a single @ */

	973 needLen--; /* already had the @ */

	974 /* startSearchHere points at the @ */

	975 } else {

	976 startSearchHere=buffer+bufLen;

	977 }

	978 if(needLen >= bufferCapacity) {

	979 *status = U_BUFFER_OVERFLOW_ERROR;

	980 return needLen; /* no change */

	981 }

	982 *startSearchHere = '@';

	983 startSearchHere++;

	984 uprv_strcpy(startSearchHere, keywordNameBuffer);

	985 startSearchHere += keywordNameLen;

	986 *startSearchHere = '=';

	987 startSearchHere++;

	988 uprv_strcpy(startSearchHere, keywordValue);

	989 startSearchHere+=keywordValueLen;

	990 return needLen;

	991 } /* end shortcut - no @ */

	992

	993 keywordStart = startSearchHere;

	994 /* search for keyword */

	995 while(keywordStart) {

	996 keywordStart++;

	997 /* skip leading spaces (allowed?) */

	998 while(*keywordStart == ' ') {

	999 keywordStart++;

	1000 }

	1001 nextEqualsign = uprv_strchr(keywordStart, '=');

	1002 /* need to normalize both keyword and keyword name */

	1003 if(!nextEqualsign) {

	1004 break;

	1005 }

	1006 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {

	1007 /* keyword name too long for internal buffer */

	1008 *status = U_INTERNAL_PROGRAM_ERROR;

	1009 return 0;

	1010 }

	1011 for(i = 0; i < nextEqualsign - keywordStart; i++) {

	1012 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);

	1013 }

	1014 /* trim trailing spaces */

	1015 while(keywordStart[i-1] == ' ') {

	1016 i--;

	1017 }

	1018 localeKeywordNameBuffer[i] = 0;

	1019

	1020 nextSeparator = uprv_strchr(nextEqualsign, ';');

	1021 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);

	1022 if(rc == 0) {

	1023 nextEqualsign++;

	1024 while(*nextEqualsign == ' ') {

	1025 nextEqualsign++;

	1026 }

	1027 /* we actually found the keyword. Change the value */

	1028 if (nextSeparator) {

	1029 keywordAtEnd = 0;

	1030 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);

	1031 } else {

	1032 keywordAtEnd = 1;

	1033 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);

	1034 }

	1035 if(keywordValue) { /* adding a value - not removing */

	1036 if(foundValueLen == keywordValueLen) {

	1037 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

	1038 return bufLen; /* no change in size */

	1039 } else if(foundValueLen > keywordValueLen) {

	1040 int32_t delta = foundValueLen - keywordValueLen;

	1041 if(nextSeparator) { /* RH side */

	1042 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nex tSeparator-buffer));

	1043 }

	1044 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

	1045 bufLen -= delta;

	1046 buffer[bufLen]=0;

	1047 return bufLen;

	1048 } else { /* FVL < KVL */

	1049 int32_t delta = keywordValueLen - foundValueLen;

	1050 if((bufLen+delta) >= bufferCapacity) {

	1051 *status = U_BUFFER_OVERFLOW_ERROR;

	1052 return bufLen+delta;

	1053 }

	1054 if(nextSeparator) { /* RH side */

	1055 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSe parator-buffer));

	1056 }

	1057 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);

	1058 bufLen += delta;

	1059 buffer[bufLen]=0;

	1060 return bufLen;

	1061 }

	1062 } else { /* removing a keyword */

	1063 if(keywordAtEnd) {

	1064 /* zero out the ';' or '@' just before startSearchhere */

	1065 keywordStart[-1] = 0;

	1066 return (int32_t)((keywordStart-buffer)-1); /* (string length wit hout keyword) minus separator */

	1067 } else {

	1068 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparat or+1)-buffer));

	1069 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;

	1070 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));

	1071 }

	1072 }

	1073 } else if(rc<0){ /* end match keyword */

	1074 /* could insert at this location. */

	1075 insertHere = keywordStart;

	1076 }

	1077 keywordStart = nextSeparator;

	1078 } /* end loop searching */

	1079

	1080 if(!keywordValue) {

	1081 return bufLen; /* removal of non-extant keyword - no change */

	1082 }

	1083

	1084 /* we know there is at least one keyword. */

	1085 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;

	1086 if(needLen >= bufferCapacity) {

	1087 *status = U_BUFFER_OVERFLOW_ERROR;

	1088 return needLen; /* no change */

	1089 }

	1090

	1091 if(insertHere) {

	1092 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));

	1093 keywordStart = insertHere;

	1094 } else {

	1095 keywordStart = buffer+bufLen;

	1096 *keywordStart = ';';

	1097 keywordStart++;

	1098 }

	1099 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);

	1100 keywordStart += keywordNameLen;

	1101 *keywordStart = '=';

	1102 keywordStart++;

	1103 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */

	1104 keywordStart+=keywordValueLen;

	1105 if(insertHere) {

	1106 *keywordStart = ';';

	1107 keywordStart++;

	1108 }

	1109 buffer[needLen]=0;

	1110 return needLen;

	1111 }

	1112

	1113 /* ### ID parsing implementation ********************************************* */

	1114

	1115 #define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))

	1116

	1117 /*returns TRUE if one of the special prefixes is here (s=string)

	1118 'x-' or 'i-' */

	1119 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))

	1120

	1121 /* Dot terminates it because of POSIX form where dot precedes the codepage

	1122 * except for variant

	1123 */

	1124 #define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))

	1125

	1126 static char* _strnchr(const char* str, int32_t len, char c) {

	1127 U_ASSERT(str != 0 && len >= 0);

	1128 while (len-- != 0) {

	1129 char d = *str;

	1130 if (d == c) {

	1131 return (char*) str;

	1132 } else if (d == 0) {

	1133 break;

	1134 }

	1135 ++str;

	1136 }

	1137 return NULL;

	1138 }

	1139

	1140 /**

	1141 * Lookup 'key' in the array 'list'. The array 'list' should contain

	1142 * a NULL entry, followed by more entries, and a second NULL entry.

	1143 *

	1144 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or

	1145 * COUNTRIES_3.

	1146 */

	1147 static int16_t _findIndex(const char* const* list, const char* key)

	1148 {

	1149 const char* const* anchor = list;

	1150 int32_t pass = 0;

	1151

	1152 /* Make two passes through two NULL-terminated arrays at 'list' */

	1153 while (pass++ < 2) {

	1154 while (*list) {

	1155 if (uprv_strcmp(key, *list) == 0) {

	1156 return (int16_t)(list - anchor);

	1157 }

	1158 list++;

	1159 }

	1160 ++list; /* skip final NULL CWB/

	1161 }

	1162 return -1;

	1163 }

	1164

	1165 /* count the length of src while copying it to dest; return strlen(src) */

	1166 static U_INLINE int32_t

	1167 _copyCount(char dest, int32_t destCapacity, const char src) {

	1168 const char *anchor;

	1169 char c;

	1170

	1171 anchor=src;

	1172 for(;;) {

	1173 if((c=*src)==0) {

	1174 return (int32_t)(src-anchor);

	1175 }

	1176 if(destCapacity<=0) {

	1177 return (int32_t)((src-anchor)+uprv_strlen(src));

	1178 }

	1179 ++src;

	1180 *dest++=c;

	1181 --destCapacity;

	1182 }

	1183 }

	1184

	1185 U_CFUNC const char*

	1186 uloc_getCurrentCountryID(const char* oldID){

	1187 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);

	1188 if (offset >= 0) {

	1189 return REPLACEMENT_COUNTRIES[offset];

	1190 }

	1191 return oldID;

	1192 }

	1193 U_CFUNC const char*

	1194 uloc_getCurrentLanguageID(const char* oldID){

	1195 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);

	1196 if (offset >= 0) {

	1197 return REPLACEMENT_LANGUAGES[offset];

	1198 }

	1199 return oldID;

	1200 }

	1201 /*

	1202 * the internal functions _getLanguage(), _getCountry(), _getVariant()

	1203 * avoid duplicating code to handle the earlier locale ID pieces

	1204 * in the functions for the later ones by

	1205 * setting the *pEnd pointer to where they stopped parsing

	1206 *

	1207 * TODO try to use this in Locale

	1208 */

	1209 U_CFUNC int32_t

	1210 ulocimp_getLanguage(const char *localeID,

	1211 char *language, int32_t languageCapacity,

	1212 const char **pEnd) {

	1213 int32_t i=0;

	1214 int32_t offset;

	1215 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for s earching */

	1216

	1217 /* if it starts with i- or x- then copy that prefix */

	1218 if(_isIDPrefix(localeID)) {

	1219 if(i<languageCapacity) {

	1220 language[i]=(char)uprv_tolower(*localeID);

	1221 }

	1222 if(i<languageCapacity) {

	1223 language[i+1]='-';

	1224 }

	1225 i+=2;

	1226 localeID+=2;

	1227 }

	1228

	1229 /* copy the language as far as possible and count its length */

	1230 while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {

	1231 if(i<languageCapacity) {

	1232 language[i]=(char)uprv_tolower(*localeID);

	1233 }

	1234 if(i<3) {

	1235 lang[i]=(char)uprv_tolower(*localeID);

	1236 }

	1237 i++;

	1238 localeID++;

	1239 }

	1240

	1241 if(i==3) {

	1242 /* convert 3 character code to 2 character code if possible CWB/

	1243 offset=_findIndex(LANGUAGES_3, lang);

	1244 if(offset>=0) {

	1245 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);

	1246 }

	1247 }

	1248

	1249 if(pEnd!=NULL) {

	1250 *pEnd=localeID;

	1251 }

	1252 return i;

	1253 }

	1254

	1255 U_CFUNC int32_t

	1256 ulocimp_getScript(const char *localeID,

	1257 char *script, int32_t scriptCapacity,

	1258 const char **pEnd)

	1259 {

	1260 int32_t idLen = 0;

	1261

	1262 if (pEnd != NULL) {

	1263 *pEnd = localeID;

	1264 }

	1265

	1266 /* copy the second item as far as possible and count its length */

	1267 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {

	1268 idLen++;

	1269 }

	1270

	1271 /* If it's exactly 4 characters long, then it's a script and not a country. */

	1272 if (idLen == 4) {

	1273 int32_t i;

	1274 if (pEnd != NULL) {

	1275 *pEnd = localeID+idLen;

	1276 }

	1277 if(idLen > scriptCapacity) {

	1278 idLen = scriptCapacity;

	1279 }

	1280 if (idLen >= 1) {

	1281 script[0]=(char)uprv_toupper(*(localeID++));

	1282 }

	1283 for (i = 1; i < idLen; i++) {

	1284 script[i]=(char)uprv_tolower(*(localeID++));

	1285 }

	1286 }

	1287 else {

	1288 idLen = 0;

	1289 }

	1290 return idLen;

	1291 }

	1292

	1293 U_CFUNC int32_t

	1294 ulocimp_getCountry(const char *localeID,

	1295 char *country, int32_t countryCapacity,

	1296 const char **pEnd)

	1297 {

	1298 int32_t idLen=0;

	1299 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };

	1300 int32_t offset;

	1301

	1302 /* copy the country as far as possible and count its length */

	1303 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {

	1304 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/

	1305 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);

	1306 }

	1307 idLen++;

	1308 }

	1309

	1310 /* the country should be either length 2 or 3 */

	1311 if (idLen == 2 \|\| idLen == 3) {

	1312 UBool gotCountry = FALSE;

	1313 /* convert 3 character code to 2 character code if possible CWB/

	1314 if(idLen==3) {

	1315 offset=_findIndex(COUNTRIES_3, cnty);

	1316 if(offset>=0) {

	1317 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);

	1318 gotCountry = TRUE;

	1319 }

	1320 }

	1321 if (!gotCountry) {

	1322 int32_t i = 0;

	1323 for (i = 0; i < idLen; i++) {

	1324 if (i < countryCapacity) {

	1325 country[i]=(char)uprv_toupper(localeID[i]);

	1326 }

	1327 }

	1328 }

	1329 localeID+=idLen;

	1330 } else {

	1331 idLen = 0;

	1332 }

	1333

	1334 if(pEnd!=NULL) {

	1335 *pEnd=localeID;

	1336 }

	1337

	1338 return idLen;

	1339 }

	1340

	1341 /**

	1342 * @param needSeparator if true, then add leading '_' if any variants

	1343 * are added to 'variant'

	1344 */

	1345 static int32_t

	1346 _getVariantEx(const char *localeID,

	1347 char prev,

	1348 char *variant, int32_t variantCapacity,

	1349 UBool needSeparator) {

	1350 int32_t i=0;

	1351

	1352 /* get one or more variant tags and separate them with '_' */

	1353 if(_isIDSeparator(prev)) {

	1354 /* get a variant string after a '-' or '_' */

	1355 while(!_isTerminator(*localeID)) {

	1356 if (needSeparator) {

	1357 if (i<variantCapacity) {

	1358 variant[i] = '_';

	1359 }

	1360 ++i;

	1361 needSeparator = FALSE;

	1362 }

	1363 if(i<variantCapacity) {

	1364 variant[i]=(char)uprv_toupper(*localeID);

	1365 if(variant[i]=='-') {

	1366 variant[i]='_';

	1367 }

	1368 }

	1369 i++;

	1370 localeID++;

	1371 }

	1372 }

	1373

	1374 /* if there is no variant tag after a '-' or '_' then look for '@' */

	1375 if(i==0) {

	1376 if(prev=='@') {

	1377 /* keep localeID */

	1378 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {

	1379 ++localeID; /* point after the '@' */

	1380 } else {

	1381 return 0;

	1382 }

	1383 while(!_isTerminator(*localeID)) {

	1384 if (needSeparator) {

	1385 if (i<variantCapacity) {

	1386 variant[i] = '_';

	1387 }

	1388 ++i;

	1389 needSeparator = FALSE;

	1390 }

	1391 if(i<variantCapacity) {

	1392 variant[i]=(char)uprv_toupper(*localeID);

	1393 if(variant[i]=='-' \|\| variant[i]==',') {

	1394 variant[i]='_';

	1395 }

	1396 }

	1397 i++;

	1398 localeID++;

	1399 }

	1400 }

	1401

	1402 return i;

	1403 }

	1404

	1405 static int32_t

	1406 _getVariant(const char *localeID,

	1407 char prev,

	1408 char *variant, int32_t variantCapacity) {

	1409 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);

	1410 }

	1411

	1412 /**

	1413 * Delete ALL instances of a variant from the given list of one or

	1414 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".

	1415 * @param variants the source string of one or more variants,

	1416 * separated by '_'. This will be MODIFIED IN PLACE. Not zero

	1417 * terminated; if it is, trailing zero will NOT be maintained.

	1418 * @param variantsLen length of variants

	1419 * @param toDelete variant to delete, without separators, e.g. "EURO"

	1420 * or "PREEURO"; not zero terminated

	1421 * @param toDeleteLen length of toDelete

	1422 * @return number of characters deleted from variants

	1423 */

	1424 static int32_t

	1425 _deleteVariant(char* variants, int32_t variantsLen,

	1426 const char* toDelete, int32_t toDeleteLen)

	1427 {

	1428 int32_t delta = 0; /* number of chars deleted */

	1429 for (;;) {

	1430 UBool flag = FALSE;

	1431 if (variantsLen < toDeleteLen) {

	1432 return delta;

	1433 }

	1434 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&

	1435 (variantsLen == toDeleteLen \|\|

	1436 (flag=(variants[toDeleteLen] == '_'))))

	1437 {

	1438 int32_t d = toDeleteLen + (flag?1:0);

	1439 variantsLen -= d;

	1440 delta += d;

	1441 if (variantsLen > 0) {

	1442 uprv_memmove(variants, variants+d, variantsLen);

	1443 }

	1444 } else {

	1445 char* p = _strnchr(variants, variantsLen, '_');

	1446 if (p == NULL) {

	1447 return delta;

	1448 }

	1449 ++p;

	1450 variantsLen -= (int32_t)(p - variants);

	1451 variants = p;

	1452 }

	1453 }

	1454 }

	1455

	1456 /* Keyword enumeration */

	1457

	1458 typedef struct UKeywordsContext {

	1459 char* keywords;

	1460 char* current;

	1461 } UKeywordsContext;

	1462

	1463 static void U_CALLCONV

	1464 uloc_kw_closeKeywords(UEnumeration *enumerator) {

	1465 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);

	1466 uprv_free(enumerator->context);

	1467 uprv_free(enumerator);

	1468 }

	1469

	1470 static int32_t U_CALLCONV

	1471 uloc_kw_countKeywords(UEnumeration en, UErrorCode status) {

	1472 char kw = ((UKeywordsContext )en->context)->keywords;

	1473 int32_t result = 0;

	1474 while(*kw) {

	1475 result++;

	1476 kw += uprv_strlen(kw)+1;

	1477 }

	1478 return result;

	1479 }

	1480

	1481 static const char* U_CALLCONV

	1482 uloc_kw_nextKeyword(UEnumeration* en,

	1483 int32_t* resultLength,

	1484 UErrorCode* status) {

	1485 const char* result = ((UKeywordsContext *)en->context)->current;

	1486 int32_t len = 0;

	1487 if(*result) {

	1488 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);

	1489 ((UKeywordsContext *)en->context)->current += len+1;

	1490 } else {

	1491 result = NULL;

	1492 }

	1493 if (resultLength) {

	1494 *resultLength = len;

	1495 }

	1496 return result;

	1497 }

	1498

	1499 static void U_CALLCONV

	1500 uloc_kw_resetKeywords(UEnumeration* en,

	1501 UErrorCode* status) {

	1502 ((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->contex t)->keywords;

	1503 }

	1504

	1505 static const UEnumeration gKeywordsEnum = {

	1506 NULL,

	1507 NULL,

	1508 uloc_kw_closeKeywords,

	1509 uloc_kw_countKeywords,

	1510 uenum_unextDefault,

	1511 uloc_kw_nextKeyword,

	1512 uloc_kw_resetKeywords

	1513 };

	1514

	1515 U_CAPI UEnumeration* U_EXPORT2

	1516 uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCod e status)

	1517 {

	1518 UKeywordsContext *myContext = NULL;

	1519 UEnumeration *result = NULL;

	1520

	1521 if(U_FAILURE(*status)) {

	1522 return NULL;

	1523 }

	1524 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));

	1525 /* Null pointer test */

	1526 if (result == NULL) {

	1527 *status = U_MEMORY_ALLOCATION_ERROR;

	1528 return NULL;

	1529 }

	1530 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));

	1531 myContext = uprv_malloc(sizeof(UKeywordsContext));

	1532 if (myContext == NULL) {

	1533 *status = U_MEMORY_ALLOCATION_ERROR;

	1534 uprv_free(result);

	1535 return NULL;

	1536 }

	1537 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);

	1538 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);

	1539 myContext->keywords[keywordListSize] = 0;

	1540 myContext->current = myContext->keywords;

	1541 result->context = myContext;

	1542 return result;

	1543 }

	1544

	1545 U_CAPI UEnumeration* U_EXPORT2

	1546 uloc_openKeywords(const char* localeID,

	1547 UErrorCode* status)

	1548 {

	1549 int32_t i=0;

	1550 char keywords[256];

	1551 int32_t keywordsCapacity = 256;

	1552 char tempBuffer[ULOC_FULLNAME_CAPACITY];

	1553 const char* tmpLocaleID;

	1554

	1555 if(status==NULL \|\| U_FAILURE(*status)) {

	1556 return 0;

	1557 }

	1558

	1559 if (_hasBCP47Extension(localeID)) {

	1560 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), sta tus);

	1561 } else {

	1562 if (localeID==NULL) {

	1563 localeID=uloc_getDefault();

	1564 }

	1565 tmpLocaleID=localeID;

	1566 }

	1567

	1568 /* Skip the language */

	1569 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);

	1570 if(_isIDSeparator(*tmpLocaleID)) {

	1571 const char *scriptID;

	1572 /* Skip the script if available */

	1573 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);

	1574 if(scriptID != tmpLocaleID+1) {

	1575 /* Found optional script */

	1576 tmpLocaleID = scriptID;

	1577 }

	1578 /* Skip the Country */

	1579 if (_isIDSeparator(*tmpLocaleID)) {

	1580 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);

	1581 if(_isIDSeparator(*tmpLocaleID)) {

	1582 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);

	1583 }

	1584 }

	1585 }

	1586

	1587 /* keywords are located after '@' */

	1588 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {

	1589 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NUL L, 0, NULL, FALSE, status);

	1590 }

	1591

	1592 if(i) {

	1593 return uloc_openKeywordList(keywords, i, status);

	1594 } else {

	1595 return NULL;

	1596 }

	1597 }

	1598

	1599

	1600 /* bit-flags for 'options' parameter of _canonicalize */

	1601 #define _ULOC_STRIP_KEYWORDS 0x2

	1602 #define _ULOC_CANONICALIZE 0x1

	1603

	1604 #define OPTION_SET(options, mask) ((options & mask) != 0)

	1605

	1606 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};

	1607 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])

	1608

	1609 /**

	1610 * Canonicalize the given localeID, to level 1 or to level 2,

	1611 * depending on the options. To specify level 1, pass in options=0.

	1612 * To specify level 2, pass in options=_ULOC_CANONICALIZE.

	1613 *

	1614 * This is the code underlying uloc_getName and uloc_canonicalize.

	1615 */

	1616 static int32_t

	1617 _canonicalize(const char* localeID,

	1618 char* result,

	1619 int32_t resultCapacity,

	1620 uint32_t options,

	1621 UErrorCode* err) {

	1622 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;

	1623 char localeBuffer[ULOC_FULLNAME_CAPACITY];

	1624 char tempBuffer[ULOC_FULLNAME_CAPACITY];

	1625 const char* origLocaleID;

	1626 const char* tmpLocaleID;

	1627 const char* keywordAssign = NULL;

	1628 const char* separatorIndicator = NULL;

	1629 const char* addKeyword = NULL;

	1630 const char* addValue = NULL;

	1631 char* name;

	1632 char* variant = NULL; /* pointer into name, or NULL */

	1633

	1634 if (U_FAILURE(*err)) {

	1635 return 0;

	1636 }

	1637

	1638 if (_hasBCP47Extension(localeID)) {

	1639 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err );

	1640 } else {

	1641 if (localeID==NULL) {

	1642 localeID=uloc_getDefault();

	1643 }

	1644 tmpLocaleID=localeID;

	1645 }

	1646

	1647 origLocaleID=tmpLocaleID;

	1648

	1649 /* if we are doing a full canonicalization, then put results in

	1650 localeBuffer, if necessary; otherwise send them to result. */

	1651 if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/

	1652 (result == NULL \|\| resultCapacity < sizeof(localeBuffer))) {

	1653 name = localeBuffer;

	1654 nameCapacity = sizeof(localeBuffer);

	1655 } else {

	1656 name = result;

	1657 nameCapacity = resultCapacity;

	1658 }

	1659

	1660 /* get all pieces, one after another, and separate with '_' */

	1661 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);

	1662

	1663 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {

	1664 const char *d = uloc_getDefault();

	1665

	1666 len = (int32_t)uprv_strlen(d);

	1667

	1668 if (name != NULL) {

	1669 uprv_strncpy(name, d, len);

	1670 }

	1671 } else if(_isIDSeparator(*tmpLocaleID)) {

	1672 const char *scriptID;

	1673

	1674 ++fieldCount;

	1675 if(len<nameCapacity) {

	1676 name[len]='_';

	1677 }

	1678 ++len;

	1679

	1680 scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);

	1681 if(scriptSize > 0) {

	1682 /* Found optional script */

	1683 tmpLocaleID = scriptID;

	1684 ++fieldCount;

	1685 len+=scriptSize;

	1686 if (_isIDSeparator(*tmpLocaleID)) {

	1687 /* If there is something else, then we add the _ */

	1688 if(len<nameCapacity) {

	1689 name[len]='_';

	1690 }

	1691 ++len;

	1692 }

	1693 }

	1694

	1695 if (_isIDSeparator(*tmpLocaleID)) {

	1696 const char *cntryID;

	1697 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, name Capacity-len, &cntryID);

	1698 if (cntrySize > 0) {

	1699 /* Found optional country */

	1700 tmpLocaleID = cntryID;

	1701 len+=cntrySize;

	1702 }

	1703 if(_isIDSeparator(*tmpLocaleID)) {

	1704 /* If there is something else, then we add the _ if we found co untry before.*/

	1705 if (cntrySize > 0) {

	1706 ++fieldCount;

	1707 if(len<nameCapacity) {

	1708 name[len]='_';

	1709 }

	1710 ++len;

	1711 }

	1712

	1713 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);

	1714 if (variantSize > 0) {

	1715 variant = name+len;

	1716 len += variantSize;

	1717 tmpLocaleID += variantSize + 1; /* skip '_' and variant */

	1718 }

	1719 }

	1720 }

	1721 }

	1722

	1723 /* Copy POSIX-style charset specifier, if any [mr.utf8] */

	1724 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {

	1725 UBool done = FALSE;

	1726 do {

	1727 char c = *tmpLocaleID;

	1728 switch (c) {

	1729 case 0:

	1730 case '@':

	1731 done = TRUE;

	1732 break;

	1733 default:

	1734 if (len<nameCapacity) {

	1735 name[len] = c;

	1736 }

	1737 ++len;

	1738 ++tmpLocaleID;

	1739 break;

	1740 }

	1741 } while (!done);

	1742 }

	1743

	1744 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'

	1745 After this, tmpLocaleID either points to '@' or is NULL */

	1746 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {

	1747 keywordAssign = uprv_strchr(tmpLocaleID, '=');

	1748 separatorIndicator = uprv_strchr(tmpLocaleID, ';');

	1749 }

	1750

	1751 /* Copy POSIX-style variant, if any [mr@FOO] */

	1752 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&

	1753 tmpLocaleID != NULL && keywordAssign == NULL) {

	1754 for (;;) {

	1755 char c = *tmpLocaleID;

	1756 if (c == 0) {

	1757 break;

	1758 }

	1759 if (len<nameCapacity) {

	1760 name[len] = c;

	1761 }

	1762 ++len;

	1763 ++tmpLocaleID;

	1764 }

	1765 }

	1766

	1767 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {

	1768 /* Handle @FOO variant if @ is present and not followed by = */

	1769 if (tmpLocaleID!=NULL && keywordAssign==NULL) {

	1770 int32_t posixVariantSize;

	1771 /* Add missing '_' if needed */

	1772 if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {

	1773 do {

	1774 if(len<nameCapacity) {

	1775 name[len]='_';

	1776 }

	1777 ++len;

	1778 ++fieldCount;

	1779 } while(fieldCount<2);

	1780 }

	1781 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameC apacity-len,

	1782 (UBool)(variantSize > 0));

	1783 if (posixVariantSize > 0) {

	1784 if (variant == NULL) {

	1785 variant = name+len;

	1786 }

	1787 len += posixVariantSize;

	1788 variantSize += posixVariantSize;

	1789 }

	1790 }

	1791

	1792 /* Handle generic variants first */

	1793 if (variant) {

	1794 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j ++) {

	1795 const char* variantToCompare = VARIANT_MAP[j].variant;

	1796 int32_t n = (int32_t)uprv_strlen(variantToCompare);

	1797 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSiz e, (nameCapacity-len)), variantToCompare, n);

	1798 len -= variantLen;

	1799 if (variantLen > 0) {

	1800 if (name[len-1] == '_') { /* delete trailing '_' */

	1801 --len;

	1802 }

	1803 addKeyword = VARIANT_MAP[j].keyword;

	1804 addValue = VARIANT_MAP[j].value;

	1805 break;

	1806 }

	1807 }

	1808 if (name[len-1] == '_') { /* delete trailing '_' */

	1809 --len;

	1810 }

	1811 }

	1812

	1813 /* Look up the ID in the canonicalization map */

	1814 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0 ])); j++) {

	1815 const char* id = CANONICALIZE_MAP[j].id;

	1816 int32_t n = (int32_t)uprv_strlen(id);

	1817 if (len == n && uprv_strncmp(name, id, n) == 0) {

	1818 if (n == 0 && tmpLocaleID != NULL) {

	1819 break; /* Don't remap "" if keywords present */

	1820 }

	1821 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonic alID);

	1822 if (CANONICALIZE_MAP[j].keyword) {

	1823 addKeyword = CANONICALIZE_MAP[j].keyword;

	1824 addValue = CANONICALIZE_MAP[j].value;

	1825 }

	1826 break;

	1827 }

	1828 }

	1829 }

	1830

	1831 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {

	1832 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&

	1833 (!separatorIndicator \|\| separatorIndicator > keywordAssign)) {

	1834 if(len<nameCapacity) {

	1835 name[len]='@';

	1836 }

	1837 ++len;

	1838 ++fieldCount;

	1839 len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,

	1840 addKeyword, addValue, err);

	1841 } else if (addKeyword != NULL) {

	1842 U_ASSERT(addValue != NULL);

	1843 /* inelegant but works -- later make _getKeywords do this? */

	1844 len += _copyCount(name+len, nameCapacity-len, "@");

	1845 len += _copyCount(name+len, nameCapacity-len, addKeyword);

	1846 len += _copyCount(name+len, nameCapacity-len, "=");

	1847 len += _copyCount(name+len, nameCapacity-len, addValue);

	1848 }

	1849 }

	1850

	1851 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {

	1852 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapaci ty : len);

	1853 }

	1854

	1855 return u_terminateChars(result, resultCapacity, len, err);

	1856 }

	1857

	1858 /* ### ID parsing API **************************************************/

	1859

	1860 U_CAPI int32_t U_EXPORT2

	1861 uloc_getParent(const char* localeID,

	1862 char* parent,

	1863 int32_t parentCapacity,

	1864 UErrorCode* err)

	1865 {

	1866 const char *lastUnderscore;

	1867 int32_t i;

	1868

	1869 if (U_FAILURE(*err))

	1870 return 0;

	1871

	1872 if (localeID == NULL)

	1873 localeID = uloc_getDefault();

	1874

	1875 lastUnderscore=uprv_strrchr(localeID, '_');

	1876 if(lastUnderscore!=NULL) {

	1877 i=(int32_t)(lastUnderscore-localeID);

	1878 } else {

	1879 i=0;

	1880 }

	1881

	1882 if(i>0 && parent != localeID) {

	1883 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));

	1884 }

	1885 return u_terminateChars(parent, parentCapacity, i, err);

	1886 }

	1887

	1888 U_CAPI int32_t U_EXPORT2

	1889 uloc_getLanguage(const char* localeID,

	1890 char* language,

	1891 int32_t languageCapacity,

	1892 UErrorCode* err)

	1893 {

	1894 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. C WB/

	1895 int32_t i=0;

	1896

	1897 if (err==NULL \|\| U_FAILURE(*err)) {

	1898 return 0;

	1899 }

	1900

	1901 if(localeID==NULL) {

	1902 localeID=uloc_getDefault();

	1903 }

	1904

	1905 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);

	1906 return u_terminateChars(language, languageCapacity, i, err);

	1907 }

	1908

	1909 U_CAPI int32_t U_EXPORT2

	1910 uloc_getScript(const char* localeID,

	1911 char* script,

	1912 int32_t scriptCapacity,

	1913 UErrorCode* err)

	1914 {

	1915 int32_t i=0;

	1916

	1917 if(err==NULL \|\| U_FAILURE(*err)) {

	1918 return 0;

	1919 }

	1920

	1921 if(localeID==NULL) {

	1922 localeID=uloc_getDefault();

	1923 }

	1924

	1925 /* skip the language */

	1926 ulocimp_getLanguage(localeID, NULL, 0, &localeID);

	1927 if(_isIDSeparator(*localeID)) {

	1928 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);

	1929 }

	1930 return u_terminateChars(script, scriptCapacity, i, err);

	1931 }

	1932

	1933 U_CAPI int32_t U_EXPORT2

	1934 uloc_getCountry(const char* localeID,

	1935 char* country,

	1936 int32_t countryCapacity,

	1937 UErrorCode* err)

	1938 {

	1939 int32_t i=0;

	1940

	1941 if(err==NULL \|\| U_FAILURE(*err)) {

	1942 return 0;

	1943 }

	1944

	1945 if(localeID==NULL) {

	1946 localeID=uloc_getDefault();

	1947 }

	1948

	1949 /* Skip the language */

	1950 ulocimp_getLanguage(localeID, NULL, 0, &localeID);

	1951 if(_isIDSeparator(*localeID)) {

	1952 const char *scriptID;

	1953 /* Skip the script if available */

	1954 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);

	1955 if(scriptID != localeID+1) {

	1956 /* Found optional script */

	1957 localeID = scriptID;

	1958 }

	1959 if(_isIDSeparator(*localeID)) {

	1960 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);

	1961 }

	1962 }

	1963 return u_terminateChars(country, countryCapacity, i, err);

	1964 }

	1965

	1966 U_CAPI int32_t U_EXPORT2

	1967 uloc_getVariant(const char* localeID,

	1968 char* variant,

	1969 int32_t variantCapacity,

	1970 UErrorCode* err)

	1971 {

	1972 char tempBuffer[ULOC_FULLNAME_CAPACITY];

	1973 const char* tmpLocaleID;

	1974 int32_t i=0;

	1975

	1976 if(err==NULL \|\| U_FAILURE(*err)) {

	1977 return 0;

	1978 }

	1979

	1980 if (_hasBCP47Extension(localeID)) {

	1981 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err );

	1982 } else {

	1983 if (localeID==NULL) {

	1984 localeID=uloc_getDefault();

	1985 }

	1986 tmpLocaleID=localeID;

	1987 }

	1988

	1989 /* Skip the language */

	1990 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);

	1991 if(_isIDSeparator(*tmpLocaleID)) {

	1992 const char *scriptID;

	1993 /* Skip the script if available */

	1994 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);

	1995 if(scriptID != tmpLocaleID+1) {

	1996 /* Found optional script */

	1997 tmpLocaleID = scriptID;

	1998 }

	1999 /* Skip the Country */

	2000 if (_isIDSeparator(*tmpLocaleID)) {

	2001 const char *cntryID;

	2002 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);

	2003 if (cntryID != tmpLocaleID+1) {

	2004 /* Found optional country */

	2005 tmpLocaleID = cntryID;

	2006 }

	2007 if(_isIDSeparator(*tmpLocaleID)) {

	2008 /* If there was no country ID, skip a possible extra IDSeparator */

	2009 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {

	2010 tmpLocaleID++;

	2011 }

	2012 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapac ity);

	2013 }

	2014 }

	2015 }

	2016

	2017 /* removed by weiv. We don't want to handle POSIX variants anymore. Use cano nicalization function */

	2018 /* if we do not have a variant tag yet then try a POSIX variant after '@' */

	2019 /*

	2020 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {

	2021 i=_getVariant(localeID+1, '@', variant, variantCapacity);

	2022 }

	2023 */

	2024 return u_terminateChars(variant, variantCapacity, i, err);

	2025 }

	2026

	2027 U_CAPI int32_t U_EXPORT2

	2028 uloc_getName(const char* localeID,

	2029 char* name,

	2030 int32_t nameCapacity,

	2031 UErrorCode* err)

	2032 {

	2033 return _canonicalize(localeID, name, nameCapacity, 0, err);

	2034 }

	2035

	2036 U_CAPI int32_t U_EXPORT2

	2037 uloc_getBaseName(const char* localeID,

	2038 char* name,

	2039 int32_t nameCapacity,

	2040 UErrorCode* err)

	2041 {

	2042 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err );

	2043 }

	2044

	2045 U_CAPI int32_t U_EXPORT2

	2046 uloc_canonicalize(const char* localeID,

	2047 char* name,

	2048 int32_t nameCapacity,

	2049 UErrorCode* err)

	2050 {

	2051 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);

	2052 }

	2053

	2054 U_CAPI const char* U_EXPORT2

	2055 uloc_getISO3Language(const char* localeID)

	2056 {

	2057 int16_t offset;

	2058 char lang[ULOC_LANG_CAPACITY];

	2059 UErrorCode err = U_ZERO_ERROR;

	2060

	2061 if (localeID == NULL)

	2062 {

	2063 localeID = uloc_getDefault();

	2064 }

	2065 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);

	2066 if (U_FAILURE(err))

	2067 return "";

	2068 offset = _findIndex(LANGUAGES, lang);

	2069 if (offset < 0)

	2070 return "";

	2071 return LANGUAGES_3[offset];

	2072 }

	2073

	2074 U_CAPI const char* U_EXPORT2

	2075 uloc_getISO3Country(const char* localeID)

	2076 {

	2077 int16_t offset;

	2078 char cntry[ULOC_LANG_CAPACITY];

	2079 UErrorCode err = U_ZERO_ERROR;

	2080

	2081 if (localeID == NULL)

	2082 {

	2083 localeID = uloc_getDefault();

	2084 }

	2085 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);

	2086 if (U_FAILURE(err))

	2087 return "";

	2088 offset = _findIndex(COUNTRIES, cntry);

	2089 if (offset < 0)

	2090 return "";

	2091

	2092 return COUNTRIES_3[offset];

	2093 }

	2094

	2095 U_CAPI uint32_t U_EXPORT2

	2096 uloc_getLCID(const char* localeID)

	2097 {

	2098 UErrorCode status = U_ZERO_ERROR;

	2099 char langID[ULOC_FULLNAME_CAPACITY];

	2100

	2101 uloc_getLanguage(localeID, langID, sizeof(langID), &status);

	2102 if (U_FAILURE(status)) {

	2103 return 0;

	2104 }

	2105

	2106 return uprv_convertToLCID(langID, localeID, &status);

	2107 }

	2108

	2109 U_CAPI int32_t U_EXPORT2

	2110 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,

	2111 UErrorCode *status)

	2112 {

	2113 int32_t length;

	2114 const char *posix = uprv_convertToPosix(hostid, status);

	2115 if (U_FAILURE(*status) \|\| posix == NULL) {

	2116 return 0;

	2117 }

	2118 length = (int32_t)uprv_strlen(posix);

	2119 if (length+1 > localeCapacity) {

	2120 *status = U_BUFFER_OVERFLOW_ERROR;

	2121 }

	2122 else {

	2123 uprv_strcpy(locale, posix);

	2124 }

	2125 return length;

	2126 }

	2127

	2128 /* ### Default locale **************************************************/

	2129

	2130 U_CAPI const char* U_EXPORT2

	2131 uloc_getDefault()

	2132 {

	2133 return locale_get_default();

	2134 }

	2135

	2136 U_CAPI void U_EXPORT2

	2137 uloc_setDefault(const char* newDefaultLocale,

	2138 UErrorCode* err)

	2139 {

	2140 if (U_FAILURE(*err))

	2141 return;

	2142 /* the error code isn't currently used for anything by this function*/

	2143

	2144 /* propagate change to C++ */

	2145 locale_set_default(newDefaultLocale);

	2146 }

	2147

	2148 /**

	2149 * Returns a list of all language codes defined in ISO 639. This is a pointer

	2150 * to an array of pointers to arrays of char. All of these pointers are owned

	2151 * by ICU-- do not delete them, and do not write through them. The array is

	2152 * terminated with a null pointer.

	2153 */

	2154 U_CAPI const char* const* U_EXPORT2

	2155 uloc_getISOLanguages()

	2156 {

	2157 return LANGUAGES;

	2158 }

	2159

	2160 /**

	2161 * Returns a list of all 2-letter country codes defined in ISO 639. This is a

	2162 * pointer to an array of pointers to arrays of char. All of these pointers are

	2163 * owned by ICU-- do not delete them, and do not write through them. The array is

	2164 * terminated with a null pointer.

	2165 */

	2166 U_CAPI const char* const* U_EXPORT2

	2167 uloc_getISOCountries()

	2168 {

	2169 return COUNTRIES;

	2170 }

	2171

	2172

	2173 /* this function to be moved into cstring.c later */

	2174 static char gDecimal = 0;

	2175

	2176 static /* U_CAPI */

	2177 double

	2178 /* U_EXPORT2 */

	2179 _uloc_strtod(const char start, char *end) {

	2180 char *decimal;

	2181 char *myEnd;

	2182 char buf[30];

	2183 double rv;

	2184 if (!gDecimal) {

	2185 char rep[5];

	2186 /* For machines that decide to change the decimal on you,

	2187 and try to be too smart with localization.

	2188 This normally should be just a '.'. */

	2189 sprintf(rep, "%+1.1f", 1.0);

	2190 gDecimal = rep[2];

	2191 }

	2192

	2193 if(gDecimal == '.') {

	2194 return uprv_strtod(start, end); /* fall through to OS */

	2195 } else {

	2196 uprv_strncpy(buf, start, 29);

	2197 buf[29]=0;

	2198 decimal = uprv_strchr(buf, '.');

	2199 if(decimal) {

	2200 *decimal = gDecimal;

	2201 } else {

	2202 return uprv_strtod(start, end); /* no decimal point */

	2203 }

	2204 rv = uprv_strtod(buf, &myEnd);

	2205 if(end) {

	2206 end = (char)(start+(myEnd-buf)); /* cast away const (to follow upr v_strtod API.) */

	2207 }

	2208 return rv;

	2209 }

	2210 }

	2211

	2212 typedef struct {

	2213 float q;

	2214 int32_t dummy; /* to avoid uninitialized memory copy from qsort */

	2215 char *locale;

	2216 } _acceptLangItem;

	2217

	2218 static int32_t U_CALLCONV

	2219 uloc_acceptLanguageCompare(const void context, const void a, const void *b)

	2220 {

	2221 const _acceptLangItem aa = (const _acceptLangItem)a;

	2222 const _acceptLangItem bb = (const _acceptLangItem)b;

	2223

	2224 int32_t rc = 0;

	2225 if(bb->q < aa->q) {

	2226 rc = -1; /* A > B */

	2227 } else if(bb->q > aa->q) {

	2228 rc = 1; /* A < B */

	2229 } else {

	2230 rc = 0; /* A = B */

	2231 }

	2232

	2233 if(rc==0) {

	2234 rc = uprv_stricmp(aa->locale, bb->locale);

	2235 }

	2236

	2237 #if defined(ULOC_DEBUG)

	2238 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",

	2239 aa->locale, aa->q,

	2240 bb->locale, bb->q,

	2241 rc);*/

	2242 #endif

	2243

	2244 return rc;

	2245 }

	2246

	2247 /*

	2248 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84 , iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it; q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53

	2249 */

	2250

	2251 U_CAPI int32_t U_EXPORT2

	2252 uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,

	2253 const char *httpAcceptLanguage,

	2254 UEnumeration* availableLocales,

	2255 UErrorCode *status)

	2256 {

	2257 _acceptLangItem *j;

	2258 _acceptLangItem smallBuffer[30];

	2259 char **strs;

	2260 char tmp[ULOC_FULLNAME_CAPACITY +1];

	2261 int32_t n = 0;

	2262 const char *itemEnd;

	2263 const char *paramEnd;

	2264 const char *s;

	2265 const char *t;

	2266 int32_t res;

	2267 int32_t i;

	2268 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);

	2269 int32_t jSize;

	2270 char tempstr; / Use for null pointer check */

	2271

	2272 j = smallBuffer;

	2273 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);

	2274 if(U_FAILURE(*status)) {

	2275 return -1;

	2276 }

	2277

	2278 for(s=httpAcceptLanguage;s&&*s;) {

	2279 while(isspace(s)) / eat space at the beginning */

	2280 s++;

	2281 itemEnd=uprv_strchr(s,',');

	2282 paramEnd=uprv_strchr(s,';');

	2283 if(!itemEnd) {

	2284 itemEnd = httpAcceptLanguage+l; /* end of string */

	2285 }

	2286 if(paramEnd && paramEnd<itemEnd) {

	2287 /* semicolon (;) is closer than end (,) */

	2288 t = paramEnd+1;

	2289 if(*t=='q') {

	2290 t++;

	2291 }

	2292 while(isspace(*t)) {

	2293 t++;

	2294 }

	2295 if(*t=='=') {

	2296 t++;

	2297 }

	2298 while(isspace(*t)) {

	2299 t++;

	2300 }

	2301 j[n].q = (float)_uloc_strtod(t,NULL);

	2302 } else {

	2303 /* no semicolon - it's 1.0 */

	2304 j[n].q = 1.0f;

	2305 paramEnd = itemEnd;

	2306 }

	2307 j[n].dummy=0;

	2308 /* eat spaces prior to semi */

	2309 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)

	2310 ;

	2311 /* Check for null pointer from uprv_strndup */

	2312 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));

	2313 if (tempstr == NULL) {

	2314 *status = U_MEMORY_ALLOCATION_ERROR;

	2315 return -1;

	2316 }

	2317 j[n].locale = tempstr;

	2318 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);

	2319 if(strcmp(j[n].locale,tmp)) {

	2320 uprv_free(j[n].locale);

	2321 j[n].locale=uprv_strdup(tmp);

	2322 }

	2323 #if defined(ULOC_DEBUG)

	2324 /fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/

	2325 #endif

	2326 n++;

	2327 s = itemEnd;

	2328 while(s==',') { / eat duplicate commas */

	2329 s++;

	2330 }

	2331 if(n>=jSize) {

	2332 if(j==smallBuffer) { /* overflowed the small buffer. */

	2333 j = uprv_malloc(sizeof(j[0])(jSize2));

	2334 if(j!=NULL) {

	2335 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);

	2336 }

	2337 #if defined(ULOC_DEBUG)

	2338 fprintf(stderr,"malloced at size %d\n", jSize);

	2339 #endif

	2340 } else {

	2341 j = uprv_realloc(j, sizeof(j[0])jSize2);

	2342 #if defined(ULOC_DEBUG)

	2343 fprintf(stderr,"re-alloced at size %d\n", jSize);

	2344 #endif

	2345 }

	2346 jSize *= 2;

	2347 if(j==NULL) {

	2348 *status = U_MEMORY_ALLOCATION_ERROR;

	2349 return -1;

	2350 }

	2351 }

	2352 }

	2353 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, s tatus);

	2354 if(U_FAILURE(*status)) {

	2355 if(j != smallBuffer) {

	2356 #if defined(ULOC_DEBUG)

	2357 fprintf(stderr,"freeing j %p\n", j);

	2358 #endif

	2359 uprv_free(j);

	2360 }

	2361 return -1;

	2362 }

	2363 strs = uprv_malloc((size_t)(sizeof(strs[0])*n));

	2364 /* Check for null pointer */

	2365 if (strs == NULL) {

	2366 uprv_free(j); /* Free to avoid memory leak */

	2367 *status = U_MEMORY_ALLOCATION_ERROR;

	2368 return -1;

	2369 }

	2370 for(i=0;i<n;i++) {

	2371 #if defined(ULOC_DEBUG)

	2372 /fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/

	2373 #endif

	2374 strs[i]=j[i].locale;

	2375 }

	2376 res = uloc_acceptLanguage(result, resultAvailable, outResult,

	2377 (const char**)strs, n, availableLocales, status);

	2378 for(i=0;i<n;i++) {

	2379 uprv_free(strs[i]);

	2380 }

	2381 uprv_free(strs);

	2382 if(j != smallBuffer) {

	2383 #if defined(ULOC_DEBUG)

	2384 fprintf(stderr,"freeing j %p\n", j);

	2385 #endif

	2386 uprv_free(j);

	2387 }

	2388 return res;

	2389 }

	2390

	2391

	2392 U_CAPI int32_t U_EXPORT2

	2393 uloc_acceptLanguage(char *result, int32_t resultAvailable,

	2394 UAcceptResult outResult, const char *acceptList,

	2395 int32_t acceptListCount,

	2396 UEnumeration* availableLocales,

	2397 UErrorCode *status)

	2398 {

	2399 int32_t i,j;

	2400 int32_t len;

	2401 int32_t maxLen=0;

	2402 char tmp[ULOC_FULLNAME_CAPACITY+1];

	2403 const char *l;

	2404 char **fallbackList;

	2405 if(U_FAILURE(*status)) {

	2406 return -1;

	2407 }

	2408 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount) );

	2409 if(fallbackList==NULL) {

	2410 *status = U_MEMORY_ALLOCATION_ERROR;

	2411 return -1;

	2412 }

	2413 for(i=0;i<acceptListCount;i++) {

	2414 #if defined(ULOC_DEBUG)

	2415 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);

	2416 #endif

	2417 while((l=uenum_next(availableLocales, NULL, status))) {

	2418 #if defined(ULOC_DEBUG)

	2419 fprintf(stderr," %s\n", l);

	2420 #endif

	2421 len = (int32_t)uprv_strlen(l);

	2422 if(!uprv_strcmp(acceptList[i], l)) {

	2423 if(outResult) {

	2424 *outResult = ULOC_ACCEPT_VALID;

	2425 }

	2426 #if defined(ULOC_DEBUG)

	2427 fprintf(stderr, "MATCH! %s\n", l);

	2428 #endif

	2429 if(len>0) {

	2430 uprv_strncpy(result, l, uprv_min(len, resultAvailable));

	2431 }

	2432 for(j=0;j<i;j++) {

	2433 uprv_free(fallbackList[j]);

	2434 }

	2435 uprv_free(fallbackList);

	2436 return u_terminateChars(result, resultAvailable, len, status);

	2437 }

	2438 if(len>maxLen) {

	2439 maxLen = len;

	2440 }

	2441 }

	2442 uenum_reset(availableLocales, status);

	2443 /* save off parent info */

	2444 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status )!=0) {

	2445 fallbackList[i] = uprv_strdup(tmp);

	2446 } else {

	2447 fallbackList[i]=0;

	2448 }

	2449 }

	2450

	2451 for(maxLen--;maxLen>0;maxLen--) {

	2452 for(i=0;i<acceptListCount;i++) {

	2453 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen )) {

	2454 #if defined(ULOC_DEBUG)

	2455 fprintf(stderr,"Try: [%s]", fallbackList[i]);

	2456 #endif

	2457 while((l=uenum_next(availableLocales, NULL, status))) {

	2458 #if defined(ULOC_DEBUG)

	2459 fprintf(stderr," %s\n", l);

	2460 #endif

	2461 len = (int32_t)uprv_strlen(l);

	2462 if(!uprv_strcmp(fallbackList[i], l)) {

	2463 if(outResult) {

	2464 *outResult = ULOC_ACCEPT_FALLBACK;

	2465 }

	2466 #if defined(ULOC_DEBUG)

	2467 fprintf(stderr, "fallback MATCH! %s\n", l);

	2468 #endif

	2469 if(len>0) {

	2470 uprv_strncpy(result, l, uprv_min(len, resultAvailabl e));

	2471 }

	2472 for(j=0;j<acceptListCount;j++) {

	2473 uprv_free(fallbackList[j]);

	2474 }

	2475 uprv_free(fallbackList);

	2476 return u_terminateChars(result, resultAvailable, len, st atus);

	2477 }

	2478 }

	2479 uenum_reset(availableLocales, status);

	2480

	2481 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0 ]), status)!=0) {

	2482 uprv_free(fallbackList[i]);

	2483 fallbackList[i] = uprv_strdup(tmp);

	2484 } else {

	2485 uprv_free(fallbackList[i]);

	2486 fallbackList[i]=0;

	2487 }

	2488 }

	2489 }

	2490 if(outResult) {

	2491 *outResult = ULOC_ACCEPT_FAILED;

	2492 }

	2493 }

	2494 for(i=0;i<acceptListCount;i++) {

	2495 uprv_free(fallbackList[i]);

	2496 }

	2497 uprv_free(fallbackList);

	2498 return -1;

	2499 }

	2500

	2501 /eof/

OLD	NEW

« no previous file with comments | « icu46/source/common/ulist.c ('k') | icu46/source/common/uloc_tag.c » ('j') | no next file with comments »