OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #import "ios/chrome/browser/ui/util/unicode_util.h" |
| 6 |
| 7 #include "base/logging.h" |
| 8 |
| 9 namespace unicode_util { |
| 10 namespace { |
| 11 // Character ranges for characters with R or AL bidirectionality. |
| 12 // http://www.ietf.org/rfc/rfc3454.txt |
| 13 const NSUInteger kRTLRangeCount = 34; |
| 14 unichar kRTLUnicodeRanges[kRTLRangeCount][2] = { |
| 15 {0x05BE, 0x05BE}, {0x05C0, 0x05C0}, {0x05C3, 0x05C3}, {0x05D0, 0x05EA}, |
| 16 {0x05F0, 0x05F4}, {0x061B, 0x061B}, {0x061F, 0x061F}, {0x0621, 0x063A}, |
| 17 {0x0640, 0x064A}, {0x066D, 0x066F}, {0x0671, 0x06D5}, {0x06DD, 0x06DD}, |
| 18 {0x06E5, 0x06E6}, {0x06FA, 0x06FE}, {0x0700, 0x070D}, {0x0710, 0x0710}, |
| 19 {0x0712, 0x072C}, {0x0780, 0x07A5}, {0x07B1, 0x07B1}, {0x200F, 0x200F}, |
| 20 {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, |
| 21 {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, {0xFB46, 0xFBB1}, |
| 22 {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFC}, |
| 23 {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}}; |
| 24 // Character ranges for characters with L bidirectionality. |
| 25 // http://www.ietf.org/rfc/rfc3454.txt |
| 26 const NSUInteger kLTRRangeCount = 17; |
| 27 unichar kLTRUnicodeRanges[kLTRRangeCount][2] = { |
| 28 {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA}, {0x00B5, 0x00B5}, |
| 29 {0x00BA, 0x00BA}, {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x0220}, |
| 30 {0x0222, 0x0233}, {0x0250, 0x02AD}, {0x02B0, 0x02B8}, {0x02BB, 0x02C1}, |
| 31 {0x02D0, 0x02D1}, {0x02E0, 0x02E4}, {0x02EE, 0x02EE}, {0x037A, 0x037A}, |
| 32 {0x0386, 0x0386}}; |
| 33 |
| 34 // Returns the character set created from the unicode value ranges in |
| 35 // |kRTLUnicodeRanges|. |
| 36 NSCharacterSet* GetRTLCharSet() { |
| 37 static NSCharacterSet* g_rtl_charset = nil; |
| 38 static dispatch_once_t rtl_once_token; |
| 39 dispatch_once(&rtl_once_token, ^{ |
| 40 NSMutableCharacterSet* rtl_charset = [[NSMutableCharacterSet alloc] init]; |
| 41 for (NSUInteger range_idx = 0; range_idx < kRTLRangeCount; ++range_idx) { |
| 42 unichar range_begin = kRTLUnicodeRanges[range_idx][0]; |
| 43 unichar range_end = kRTLUnicodeRanges[range_idx][1]; |
| 44 NSRange rtl_range = NSMakeRange(range_begin, range_end + 1 - range_begin); |
| 45 [rtl_charset addCharactersInRange:rtl_range]; |
| 46 } |
| 47 g_rtl_charset = rtl_charset; |
| 48 }); |
| 49 return g_rtl_charset; |
| 50 } |
| 51 |
| 52 // Returns the character set created from the unicode value ranges in |
| 53 // |kLTRUnicodeRanges|. |
| 54 NSCharacterSet* GetLTRCharSet() { |
| 55 static NSCharacterSet* g_ltr_charset = nil; |
| 56 static dispatch_once_t ltr_once_token; |
| 57 dispatch_once(<r_once_token, ^{ |
| 58 NSMutableCharacterSet* ltr_charset = [[NSMutableCharacterSet alloc] init]; |
| 59 for (NSUInteger range_idx = 0; range_idx < kLTRRangeCount; ++range_idx) { |
| 60 unichar range_begin = kLTRUnicodeRanges[range_idx][0]; |
| 61 unichar range_end = kLTRUnicodeRanges[range_idx][1]; |
| 62 NSRange ltr_range = NSMakeRange(range_begin, range_end + 1 - range_begin); |
| 63 [ltr_charset addCharactersInRange:ltr_range]; |
| 64 } |
| 65 g_ltr_charset = ltr_charset; |
| 66 }); |
| 67 return g_ltr_charset; |
| 68 } |
| 69 } // namespace |
| 70 |
| 71 bool IsCharRTL(unichar c) { |
| 72 return [GetRTLCharSet() characterIsMember:c]; |
| 73 } |
| 74 |
| 75 bool IsCharLTR(unichar c) { |
| 76 return [GetLTRCharSet() characterIsMember:c]; |
| 77 } |
| 78 |
| 79 NSWritingDirection UnicodeWritingDirectionForString(NSString* string) { |
| 80 for (NSUInteger char_idx = 0; char_idx < string.length; ++char_idx) { |
| 81 unichar c = [string characterAtIndex:char_idx]; |
| 82 if (IsCharRTL(c)) |
| 83 return NSWritingDirectionRightToLeft; |
| 84 if (IsCharLTR(c)) |
| 85 return NSWritingDirectionLeftToRight; |
| 86 } |
| 87 return NSWritingDirectionNatural; |
| 88 } |
| 89 |
| 90 NSString* GetEscapedUnicodeStringForString(NSString* string) { |
| 91 NSMutableString* unicode_string = [NSMutableString string]; |
| 92 for (NSUInteger i = 0; i < string.length; ++i) { |
| 93 unichar c = [string characterAtIndex:i]; |
| 94 // unichars are 16-bit unsigned integers, and when printed out using the %x |
| 95 // string format, it only uses the minimum number of digits necessary to |
| 96 // express the character value. However, constructing NSString literals |
| 97 // using Unicode character codes requires that each character have 4 hex |
| 98 // digits following the "\u". |zero_prefix| is constructed such that it |
| 99 // ensures each character to have 4 digits. |
| 100 NSString* zero_prefix = @""; |
| 101 if (c < 0x0010) |
| 102 zero_prefix = @"000"; |
| 103 else if (c < 0x0100) |
| 104 zero_prefix = @"00"; |
| 105 else if (c < 0x1000) |
| 106 zero_prefix = @"0"; |
| 107 [unicode_string appendFormat:@"\\u%@%x", zero_prefix, c]; |
| 108 } |
| 109 return unicode_string; |
| 110 } |
| 111 |
| 112 } // namespace unicode_util |
OLD | NEW |