app/l10n_util.cc - Issue 5643002: Add utility function to determine if a locale is valid syntax; this will...

Unified Diff: app/l10n_util.cc

Issue 5643002: Add utility function to determine if a locale is valid syntax; this will... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: app/l10n_util.cc

===================================================================

--- app/l10n_util.cc (revision 68844)

+++ app/l10n_util.cc (working copy)

@@ -29,6 +29,7 @@

#include "build/build_config.h"

#include "gfx/canvas.h"

#include "unicode/rbbi.h"

+#include "unicode/uloc.h"

#if defined(OS_MACOSX)

#include "app/l10n_util_mac.h"

@@ -478,6 +479,102 @@

return display_name;

}

+std::string NormalizeLocale(const std::string& locale) {

+ std::string normalized_locale(locale);

+ std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');

+ return normalized_locale;

+void GetParentLocales(const std::string& current_locale,

+ std::vector<std::string>* parent_locales) {

+ std::string locale(NormalizeLocale(current_locale));

+ const int kNameCapacity = 256;

+ char parent[kNameCapacity];

+ base::strlcpy(parent, locale.c_str(), kNameCapacity);

+ parent_locales->push_back(parent);

+ UErrorCode err = U_ZERO_ERROR;

+ while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {

+ if (U_FAILURE(err))

+ break;

+ parent_locales->push_back(parent);

+ }

+bool IsValidLocaleSyntax(const std::string& locale) {

+ // This implements a simple approximation of BCP 47; it will accept all

+ // valid strings and reject some (but not all) invalid ones.

+ // http://www.unicode.org/reports/tr35/

+ // #Unicode_Language_and_Locale_Identifiers

Nebojša Ćirić 2010/12/10 21:42:35 You could move some (url to TR35) to .h file and r

+ // Check that the length is plausible.

+ if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)

+ return false;

+ // Strip off the part after an '@' sign, which might contain keywords,

+ // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.

+ // We don't validate that part much, just check that there's at least one

+ // equals sign in a plausible place.

+ std::string prefix = locale;

+ if (locale.find("@") != std::string::npos) {

+ size_t split_point = locale.find("@");

Nebojša Ćirić 2010/12/10 21:42:35 You could do: size_t split_point = locale.find("@"

+ std::string keywords = locale.substr(split_point + 1);

+ prefix = locale.substr(0, split_point);

+ size_t equals_loc = keywords.find("=");

+ if (equals_loc == std::string::npos ||

+ equals_loc < 1 || equals_loc > keywords.size() - 2)

+ return false;

+ }

+ // Check that all characters before the at-sign are alphanumeric, hyphen,

+ // or underscore.

Nebojša Ćirić 2010/12/10 21:42:35 You could call NormalizeLocale before processing t

+ for (size_t i = 0; i < prefix.size(); i++) {

+ char ch = prefix[i];

+ if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '-' && ch != '_')

+ return false;

+ }

+ // Check that the initial token (before the first hyphen/underscore)

+ // is 1 - 3 alphabetical characters (a language tag).

+ for (size_t i = 0; i < prefix.size(); i++) {

+ char ch = prefix[i];

+ if (ch == '-' || ch == '_') {

+ if (i < 1 || i > 3)

+ return false;

+ break;

+ }

+ if (!IsAsciiAlpha(ch))

+ return false;

+ }

+ // Check that the all tokens after the initial token are 1 - 8 characters.

+ // (Tokenize/StringTokenizer don't work here, they collapse multiple

+ // delimiters into one.)

+ int token_len = 0;

+ int token_index = 0;

+ for (size_t i = 0; i < prefix.size(); i++) {

+ char ch = prefix[i];

+ if (ch == '-' || ch == '_') {

+ if (token_index > 0 && (token_len < 1 || token_len > 8)) {

+ return false;

+ }

+ token_index++;

+ token_len = 0;

+ } else {

+ token_len++;

+ }

+ if (token_index == 0 && (token_len < 1 || token_len > 3)) {

+ return false;

+ } else if (token_len < 1 || token_len > 8) {

+ return false;

+ }

+ return true;

std::wstring GetString(int message_id) {

return UTF16ToWide(GetStringUTF16(message_id));

}

« app/l10n_util.h ('K') | « app/l10n_util.h ('k') | app/l10n_util_unittest.cc » ('j') | app/l10n_util_unittest.cc » ('J')