Chromium Code Reviews| Index: net/android/java/src/org/chromium/net/NetStringUtil.java |
| diff --git a/net/android/java/src/org/chromium/net/NetStringUtil.java b/net/android/java/src/org/chromium/net/NetStringUtil.java |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..b9824bf11617021d26966146901072140faae995 |
| --- /dev/null |
| +++ b/net/android/java/src/org/chromium/net/NetStringUtil.java |
| @@ -0,0 +1,89 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +package org.chromium.net; |
| + |
| +import org.chromium.base.CalledByNative; |
| +import org.chromium.base.JNINamespace; |
| + |
| +import java.nio.ByteBuffer; |
| +import java.nio.charset.Charset; |
| +import java.nio.charset.CharsetDecoder; |
| +import java.nio.charset.CodingErrorAction; |
| +import java.text.Normalizer; |
| + |
| +/** |
| + * Utility functions for converting strings between formats when not built with |
| + * icu. |
| + */ |
| +@JNINamespace("net::android") |
| +public class NetStringUtil { |
| + /** |
| + * Attempts to convert text in a given character set to a Unicode string. |
| + * Returns null on failure. |
| + * @param text Text to convert. |
| + * @param charset Character set it's in encoded in. |
| + * @return: Unicode string on success, null on failure. |
| + */ |
| + @CalledByNative |
| + private static String convertToUnicode( |
| + ByteBuffer text, |
|
mef
2014/05/06 15:40:58
I presume that |text| is array of char, but it is
mmenke
2014/05/06 18:57:21
Done.
|
| + String charset_name) { |
| + try { |
| + Charset charset = Charset.forName(charset_name); |
|
mef
2014/05/06 15:40:58
what happens if charset_name is invalid?
mmenke
2014/05/06 18:57:21
It throws an IllegalCharsetNameException or Unsupp
|
| + CharsetDecoder decoder = charset.newDecoder(); |
| + // On invalid characters, this will throw an exception. |
| + return decoder.decode(text).toString(); |
| + } catch (Exception e) { |
| + return null; |
| + } |
| + } |
| + |
| + /** |
| + * Attempts to convert text in a given character set to a Unicode string, |
| + * and normalize it. Returns null on failure. |
| + * @param text Text to convert. |
| + * @param charset Character set it's in encoded in. |
| + * @return: Unicode string on success, null on failure. |
| + */ |
| + @CalledByNative |
| + private static String convertToUnicodeAndNormalize( |
| + ByteBuffer text, |
| + String charset_name) { |
| + String unicodeString = convertToUnicode(text, charset_name); |
| + if (unicodeString == null) |
| + return unicodeString; |
| + return Normalizer.normalize(unicodeString, Normalizer.Form.NFC); |
| + } |
| + |
| + /** |
| + * Convert text in a given character set to a Unicode string. Any invalid |
| + * characters are replaced with U+FFFD. Returns null if the character set |
| + * is not recognized. |
| + * @param text Text to convert. |
| + * @param charset Character set it's in encoded in. |
| + * @return: Unicode string on success, null on failure. |
| + */ |
| + @CalledByNative |
| + private static String convertToUnicodeWithSubstitutions( |
| + ByteBuffer text, |
| + String charset_name) { |
| + try { |
| + Charset charset = Charset.forName(charset_name); |
|
mef
2014/05/06 15:40:58
check charset != null?
mmenke
2014/05/06 18:57:21
We get an IllegalArgumentException in that case, a
|
| + |
| + // TODO(mmenke): Investigate if Charset.decode() can be used |
| + // instead. The question is whether it uses the proper replace |
| + // character. JDK CharsetDecoder docs say U+FFFD is the default, |
| + // but Charset.decode() docs say it uses the "charset's default |
| + // replacement byte array". |
| + CharsetDecoder decoder = charset.newDecoder(); |
| + decoder.onMalformedInput(CodingErrorAction.REPLACE); |
| + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| + decoder.replaceWith("\uFFFD"); |
| + return decoder.decode(text).toString(); |
| + } catch (Exception e) { |
| + return null; |
| + } |
| + } |
| +} |