utilities.cc - Issue 6740005: Handle UCS-2 data coding scheme for SMS messsages.

Unified Diff: utilities.cc

Issue 6740005: Handle UCS-2 data coding scheme for SMS messsages. (Closed) Base URL: ssh://gitrw.chromium.org:9222/cromo.git@master

Patch Set: Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: utilities.cc

diff --git a/utilities.cc b/utilities.cc

index 41f6c78c22c42cade00a1538b9880da313eb1ee9..e491789cf15ac5688dd0519dec57f8cd3becd2b0 100644

--- a/utilities.cc

+++ b/utilities.cc

@@ -1,4 +1,4 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

@@ -273,6 +273,62 @@ std::vector<uint8_t> Utf8StringToGsm7(const std::string& input) {

return octets;

}

+std::string Ucs2ToUtf8String(const uint8_t *ucs2) {

+ std::string str;

+ uint8_t num_chars = *ucs2++ >> 1;

Nathan Williams 2011/03/30 22:31:54 Is it possible for the SMS UCS2 strings to start w

Eric Shienbrood 2011/03/31 20:03:59 Given what the 3GPP spec 23.040 says, can we skip

+ for (int i = 0; i < num_chars; ++i) {

+ uint16_t ucs2char = ucs2[0] << 8 | ucs2[1];

+ if (0 <= ucs2char && ucs2char <= 0x7f) {

+ str += ucs2[1];

+ } else if (0x80 <= ucs2char && ucs2char <= 0x7ff) {

Nathan Williams 2011/03/30 22:31:54 The <= part is redundant, though probably harmless

Eric Shienbrood 2011/03/31 20:03:59 Why is it redundant?

+ str += (uint8_t)(0xc0 | ((ucs2char & 0x7c0) >> 6));

+ str += (uint8_t)(0x80 | (ucs2char & 0x3f));

+ } else {

+ str += (uint8_t)(0xe0 | ((ucs2char & 0xf000) >> 12));

+ str += (uint8_t)(0x80 | ((ucs2char & 0xfc0) >> 6));

+ str += (uint8_t)(0x80 | (ucs2char & 0x3f));

+ }

+ ucs2 += 2;

+ }

+ return str;

+std::vector<uint8_t> Utf8StringToUcs2(const std::string& input)

+ std::vector<uint8_t> octets;

+ size_t length = input.length();

+ // First byte gives the length in octets of the UCS-2 string

+ // Insert a placeholder value until we know the true length.

+ octets.push_back(0);

+ // First map each UTF-8 character to its GSM7 equivalent.

Nathan Williams 2011/03/30 22:31:54 s/GSM7/UCS-2/, or ditch the comment entirely, sinc

Eric Shienbrood 2011/03/31 20:03:59 Done.

+ for (size_t i = 0; i < length; i++) {

+ char char1 = input.at(i);

+ // Check whether this is a one byte UTF-8 sequence, or the

+ // start of a two or three byte sequence.

+ if ((char1 & 0x80) == 0) {

+ octets.push_back(0);

+ octets.push_back(char1);

+ } else if ((char1 & 0xe0) == 0xc0) {

+ uint8_t char2 = input.at(++i);

+ octets.push_back((char1 >> 2) & 0x7);

+ octets.push_back(((char1 & 0x3) << 6) | (char2 & 0x3f));

+ } else if ((char1 & 0xf0) == 0xe0) {

+ uint8_t char2 = input.at(++i);

+ uint8_t char3 = input.at(++i);

+ octets.push_back(((char1 & 0xf) << 4) | ((char2 & 0x30) >> 2));

+ octets.push_back(((char2 & 0x3) << 6) | (char3 & 0x3f));

+ } else {

+ // character not representable in UCS-2, insert a space

+ octets.push_back(0);

+ octets.push_back(' ');

+ }

+ octets[0] = octets.size() - 1;

+ return octets;

void DumpHex(const uint8_t* buf, size_t size) {

size_t nlines = (size+15) / 16;

size_t limit;

« sms_message.cc ('K') | « utilities.h ('k') | utilities_unittest.cc » ('j') | no next file with comments »