utilities.cc - Issue 6740005: Handle UCS-2 data coding scheme for SMS messsages.

Side by Side Diff: utilities.cc

Issue 6740005: Handle UCS-2 data coding scheme for SMS messsages. (Closed) Base URL: ssh://gitrw.chromium.org:9222/cromo.git@master

Patch Set: Created 9 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2010 The Chromium OS Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium OS Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Plugin tests link against this file, but not against the rest of	5 // Plugin tests link against this file, but not against the rest of

6 // cromo. Therefore this file should not have dependencies on the	6 // cromo. Therefore this file should not have dependencies on the

7 // rest of cromo.	7 // rest of cromo.

8 #include "utilities.h"	8 #include "utilities.h"

9	9

10 #include <glog/logging.h>	10 #include <glog/logging.h>

11 #include <stdio.h>	11 #include <stdio.h>

(...skipping 254 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
266 octet \|= septets.at(k+1) << (7-shift);	266 octet \|= septets.at(k+1) << (7-shift);

267 octets.push_back(octet);	267 octets.push_back(octet);

268 }	268 }

269 if (++shift == 8)	269 if (++shift == 8)

270 shift = 0;	270 shift = 0;

271 }	271 }

272	272

273 return octets;	273 return octets;

274 }	274 }

275	275

	276 std::string Ucs2ToUtf8String(const uint8_t *ucs2) {

	277 std::string str;

	278 uint8_t num_chars = *ucs2++ >> 1;

	279
	Nathan Williams 2011/03/30 22:31:54 Is it possible for the SMS UCS2 strings to start w Is it possible for the SMS UCS2 strings to start with a byte-order mark? Big-endian certainly seems to be conventional here, but I haven't seen anything ruling out little-endian with a BOM, or an extraneous big-endian BOM. Eric Shienbrood 2011/03/31 20:03:59 Given what the 3GPP spec 23.040 says, can we skip Show quoted text On 2011/03/30 22:31:54, Nathan Williams wrote: > Is it possible for the SMS UCS2 strings to start with a byte-order mark? > Big-endian certainly seems to be conventional here, but I haven't seen anything > ruling out little-endian with a BOM, or an extraneous big-endian BOM. Given what the 3GPP spec 23.040 says, can we skip worrying about this for the moment? I know it would be more robust to deal with it, but I'd rather wait until we know there's a need.
	280 for (int i = 0; i < num_chars; ++i) {

	281 uint16_t ucs2char = ucs2[0] << 8 \| ucs2[1];

	282 if (0 <= ucs2char && ucs2char <= 0x7f) {

	283 str += ucs2[1];

	284 } else if (0x80 <= ucs2char && ucs2char <= 0x7ff) {
	Nathan Williams 2011/03/30 22:31:54 The <= part is redundant, though probably harmless The <= part is redundant, though probably harmless. Eric Shienbrood 2011/03/31 20:03:59 Why is it redundant? Show quoted text On 2011/03/30 22:31:54, Nathan Williams wrote: > The <= part is redundant, though probably harmless. Why is it redundant?
	285 str += (uint8_t)(0xc0 \| ((ucs2char & 0x7c0) >> 6));

	286 str += (uint8_t)(0x80 \| (ucs2char & 0x3f));

	287 } else {

	288 str += (uint8_t)(0xe0 \| ((ucs2char & 0xf000) >> 12));

	289 str += (uint8_t)(0x80 \| ((ucs2char & 0xfc0) >> 6));

	290 str += (uint8_t)(0x80 \| (ucs2char & 0x3f));

	291 }

	292 ucs2 += 2;

	293 }

	294 return str;

	295 }

	296

	297 std::vector<uint8_t> Utf8StringToUcs2(const std::string& input)

	298 {

	299 std::vector<uint8_t> octets;

	300 size_t length = input.length();

	301

	302 // First byte gives the length in octets of the UCS-2 string

	303 // Insert a placeholder value until we know the true length.

	304 octets.push_back(0);

	305 // First map each UTF-8 character to its GSM7 equivalent.
	Nathan Williams 2011/03/30 22:31:54 s/GSM7/UCS-2/, or ditch the comment entirely, sinc s/GSM7/UCS-2/, or ditch the comment entirely, since it's not really a multistep process. Eric Shienbrood 2011/03/31 20:03:59 Done. Show quoted text On 2011/03/30 22:31:54, Nathan Williams wrote: > s/GSM7/UCS-2/, or ditch the comment entirely, since it's not really a multistep > process. Done.
	306 for (size_t i = 0; i < length; i++) {

	307 char char1 = input.at(i);

	308 // Check whether this is a one byte UTF-8 sequence, or the

	309 // start of a two or three byte sequence.

	310 if ((char1 & 0x80) == 0) {

	311 octets.push_back(0);

	312 octets.push_back(char1);

	313 } else if ((char1 & 0xe0) == 0xc0) {

	314 uint8_t char2 = input.at(++i);

	315 octets.push_back((char1 >> 2) & 0x7);

	316 octets.push_back(((char1 & 0x3) << 6) \| (char2 & 0x3f));

	317 } else if ((char1 & 0xf0) == 0xe0) {

	318 uint8_t char2 = input.at(++i);

	319 uint8_t char3 = input.at(++i);

	320 octets.push_back(((char1 & 0xf) << 4) \| ((char2 & 0x30) >> 2));

	321 octets.push_back(((char2 & 0x3) << 6) \| (char3 & 0x3f));

	322 } else {

	323 // character not representable in UCS-2, insert a space

	324 octets.push_back(0);

	325 octets.push_back(' ');

	326 }

	327 }

	328 octets[0] = octets.size() - 1;

	329 return octets;

	330 }

	331

276 void DumpHex(const uint8_t* buf, size_t size) {	332 void DumpHex(const uint8_t* buf, size_t size) {

277 size_t nlines = (size+15) / 16;	333 size_t nlines = (size+15) / 16;

278 size_t limit;	334 size_t limit;

279	335

280 for (size_t i = 0; i < nlines; i++) {	336 for (size_t i = 0; i < nlines; i++) {

281 std::ostringstream ostr;	337 std::ostringstream ostr;

282 ostr << std::hex;	338 ostr << std::hex;

283 ostr.fill('0');	339 ostr.fill('0');

284 ostr.width(8);	340 ostr.width(8);

285 if (i*16 + 16 >= size)	341 if (i*16 + 16 >= size)

286 limit = size - i*16;	342 limit = size - i*16;

287 else	343 else

288 limit = 16;	344 limit = 16;

289 ostr << i*16 << " ";	345 ostr << i*16 << " ";

290 ostr.fill('0');	346 ostr.fill('0');

291 ostr.width(2);	347 ostr.width(2);

292 for (size_t j = 0; j < limit; j++) {	348 for (size_t j = 0; j < limit; j++) {

293 uint8_t byte = buf[i*16+j];	349 uint8_t byte = buf[i*16+j];

294 ostr << std::setw(0) << " " << std::setw(2) << std::setfill('0')	350 ostr << std::setw(0) << " " << std::setw(2) << std::setfill('0')

295 << static_cast<unsigned int>(byte);	351 << static_cast<unsigned int>(byte);

296 }	352 }

297 LOG(INFO) << ostr.str();	353 LOG(INFO) << ostr.str();

298 }	354 }

299 }	355 }

300	356

301 } // namespace utilities	357 } // namespace utilities

OLD	NEW

« sms_message.cc ('K') | « utilities.h ('k') | utilities_unittest.cc » ('j') | no next file with comments »