| Index: source/common/ucnv2022.cpp
 | 
| diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
 | 
| index ea165eca5c783b4c64e6f773bc7489e3cac988d2..0c12dd0d7ba3dc2c144ac19e31b1bd4dc90c0c89 100644
 | 
| --- a/source/common/ucnv2022.cpp
 | 
| +++ b/source/common/ucnv2022.cpp
 | 
| @@ -1,6 +1,6 @@
 | 
|  /*
 | 
|  **********************************************************************
 | 
| -*   Copyright (C) 2000-2014, International Business Machines
 | 
| +*   Copyright (C) 2000-2015, International Business Machines
 | 
|  *   Corporation and others.  All Rights Reserved.
 | 
|  **********************************************************************
 | 
|  *   file name:  ucnv2022.cpp
 | 
| @@ -75,8 +75,10 @@
 | 
|   */
 | 
|  #endif
 | 
|  
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  static const char SHIFT_IN_STR[]  = "\x0F";
 | 
|  // static const char SHIFT_OUT_STR[] = "\x0E";
 | 
| +#endif
 | 
|  
 | 
|  #define CR      0x0D
 | 
|  #define LF      0x0A
 | 
| @@ -152,7 +154,7 @@ typedef enum  {
 | 
|  } StateEnum;
 | 
|  
 | 
|  /* is the StateEnum charset value for a DBCS charset? */
 | 
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if UCONFIG_ONLY_HTML_CONVERSION
 | 
|  #define IS_JP_DBCS(cs) (JISX208==(cs))
 | 
|  #else
 | 
|  #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
 | 
| @@ -169,18 +171,14 @@ typedef enum  {
 | 
|   *   all versions, not just JIS7 and JIS8.
 | 
|   * - ICU does not distinguish between different versions of JIS X 0208.
 | 
|   */
 | 
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if UCONFIG_ONLY_HTML_CONVERSION
 | 
|  enum { MAX_JA_VERSION=0 };
 | 
|  #else
 | 
|  enum { MAX_JA_VERSION=4 };
 | 
|  #endif
 | 
|  static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
 | 
| -/* 
 | 
| - * TODO(jshin): The encoding spec has JISX212, but we don't support it.
 | 
| - * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
 | 
| - */
 | 
|      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
 | 
|      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 | 
|      CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
 | 
| @@ -377,10 +375,8 @@ typedef enum{
 | 
|  #ifdef U_ENABLE_GENERIC_ISO_2022
 | 
|      ISO_2022=0,
 | 
|  #endif
 | 
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| -    ISO_2022_JP=1
 | 
| -#else
 | 
|      ISO_2022_JP=1,
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|      ISO_2022_KR=2,
 | 
|      ISO_2022_CN=3
 | 
|  #endif
 | 
| @@ -414,8 +410,11 @@ namespace {
 | 
|  
 | 
|  /*const UConverterSharedData _ISO2022Data;*/
 | 
|  extern const UConverterSharedData _ISO2022JPData;
 | 
| +
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  extern const UConverterSharedData _ISO2022KRData;
 | 
|  extern const UConverterSharedData _ISO2022CNData;
 | 
| +#endif
 | 
|  
 | 
|  }  // namespace
 | 
|  
 | 
| @@ -494,21 +493,20 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 | 
|          if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
 | 
|              (myLocale[2]=='_' || myLocale[2]=='\0'))
 | 
|          {
 | 
| -            size_t len=0;
 | 
|              /* open the required converters and cache them */
 | 
|              if(version>MAX_JA_VERSION) {
 | 
| -                /* prevent indexing beyond jpCharsetMasks[] */
 | 
| -                myConverterData->version = version = 0;
 | 
| +                // ICU 55 fails to open a converter for an unsupported version.
 | 
| +                // Previously, it fell back to version 0, but that would yield
 | 
| +                // unexpected behavior.
 | 
| +                *errorCode = U_MISSING_RESOURCE_ERROR;
 | 
| +                return;
 | 
|              }
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|              if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
 | 
|                  myConverterData->myConverterArray[ISO8859_7] =
 | 
|                      ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
 | 
|              }
 | 
| -#endif
 | 
|              myConverterData->myConverterArray[JISX208] =
 | 
|                  ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|              if(jpCharsetMasks[version]&CSM(JISX212)) {
 | 
|                  myConverterData->myConverterArray[JISX212] =
 | 
|                      ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
 | 
| @@ -521,21 +519,27 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 | 
|                  myConverterData->myConverterArray[KSC5601] =
 | 
|                      ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
 | 
|              }
 | 
| -#endif
 | 
|  
 | 
|              /* set the function pointers to appropriate funtions */
 | 
|              cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
 | 
|              uprv_strcpy(myConverterData->locale,"ja");
 | 
|  
 | 
|              (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
 | 
| -            len = uprv_strlen(myConverterData->name);
 | 
| +            size_t len = uprv_strlen(myConverterData->name);
 | 
|              myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
 | 
|              myConverterData->name[len+1]='\0';
 | 
|          }
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|          else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
 | 
|              (myLocale[2]=='_' || myLocale[2]=='\0'))
 | 
|          {
 | 
| +            if(version>1) {
 | 
| +                // ICU 55 fails to open a converter for an unsupported version.
 | 
| +                // Previously, it fell back to version 0, but that would yield
 | 
| +                // unexpected behavior.
 | 
| +                *errorCode = U_MISSING_RESOURCE_ERROR;
 | 
| +                return;
 | 
| +            }
 | 
|              const char *cnvName;
 | 
|              if(version==1) {
 | 
|                  cnvName="icu-internal-25546";
 | 
| @@ -575,6 +579,13 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 | 
|          else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
 | 
|              (myLocale[2]=='_' || myLocale[2]=='\0'))
 | 
|          {
 | 
| +            if(version>2) {
 | 
| +                // ICU 55 fails to open a converter for an unsupported version.
 | 
| +                // Previously, it fell back to version 0, but that would yield
 | 
| +                // unexpected behavior.
 | 
| +                *errorCode = U_MISSING_RESOURCE_ERROR;
 | 
| +                return;
 | 
| +            }
 | 
|  
 | 
|              /* open the required converters and cache them */
 | 
|              myConverterData->myConverterArray[GB2312_1] =
 | 
| @@ -602,7 +613,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 | 
|                  (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
 | 
|              }
 | 
|          }
 | 
| -#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#endif  // !UCONFIG_ONLY_HTML_CONVERSION
 | 
|          else{
 | 
|  #ifdef U_ENABLE_GENERIC_ISO_2022
 | 
|              myConverterData->isFirstBuffer = TRUE;
 | 
| @@ -617,7 +628,9 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
 | 
|              /* initialize the state variables */
 | 
|              uprv_strcpy(myConverterData->name,"ISO_2022");
 | 
|  #else
 | 
| -            *errorCode = U_UNSUPPORTED_ERROR;
 | 
| +            *errorCode = U_MISSING_RESOURCE_ERROR;
 | 
| +            // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
 | 
| +            // data loading error code.
 | 
|              return;
 | 
|  #endif
 | 
|          }
 | 
| @@ -737,7 +750,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
 | 
|      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
 | 
|  };
 | 
|  
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  /*************** to unicode *******************/
 | 
|  static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
 | 
|  /*      0                1               2               3               4               5               6               7               8               9    */
 | 
| @@ -903,7 +916,6 @@ DONE:
 | 
|                      }
 | 
|                      break;
 | 
|                  /* case SS3_STATE: not used in ISO-2022-JP-x */
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                  case ISO8859_1:
 | 
|                  case ISO8859_7:
 | 
|                      if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 | 
| @@ -913,7 +925,6 @@ DONE:
 | 
|                          myData2022->toU2022State.cs[2]=(int8_t)tempState;
 | 
|                      }
 | 
|                      break;
 | 
| -#endif
 | 
|                  default:
 | 
|                      if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
 | 
|                          *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 | 
| @@ -925,7 +936,7 @@ DONE:
 | 
|                  }
 | 
|              }
 | 
|              break;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|          case ISO_2022_CN:
 | 
|              {
 | 
|                  StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
 | 
| @@ -987,7 +998,7 @@ DONE:
 | 
|                  *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
 | 
|              }
 | 
|              break;
 | 
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 | 
| +#endif  // !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  
 | 
|          default:
 | 
|              *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 | 
| @@ -1030,6 +1041,7 @@ DONE:
 | 
|      }
 | 
|  }
 | 
|  
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  /*Checks the characters of the buffer against valid 2022 escape sequences
 | 
|  *if the match we return a pointer to the initial start of the sequence otherwise
 | 
|  *we return sourceLimit
 | 
| @@ -1084,7 +1096,7 @@ getEndOfBuffer_2022(const char** source,
 | 
|      return mySource;
 | 
|  #endif
 | 
|  }
 | 
| -
 | 
| +#endif
 | 
|  
 | 
|  /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
 | 
|   * any future change in _MBCSFromUChar32() function should be reflected here.
 | 
| @@ -1408,16 +1420,12 @@ toUnicodeCallback(UConverter *cnv,
 | 
|  static const StateEnum jpCharsetPref[]={
 | 
|      ASCII,
 | 
|      JISX201,
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|      ISO8859_1,
 | 
| -    ISO8859_7,
 | 
| -#endif
 | 
|      JISX208,
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +    ISO8859_7,
 | 
|      JISX212,
 | 
|      GB2312,
 | 
|      KSC5601,
 | 
| -#endif
 | 
|      HWKANA_7BIT
 | 
|  };
 | 
|  
 | 
| @@ -1787,7 +1795,6 @@ getTrail:
 | 
|                          g = 0;
 | 
|                      }
 | 
|                      break;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                  case ISO8859_1:
 | 
|                      if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
 | 
|                          targetValue = (uint32_t)sourceChar - 0x80;
 | 
| @@ -1796,7 +1803,6 @@ getTrail:
 | 
|                          g = 2;
 | 
|                      }
 | 
|                      break;
 | 
| -#endif
 | 
|                  case HWKANA_7BIT:
 | 
|                      if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
 | 
|                          if(converterData->version==3) {
 | 
| @@ -1858,7 +1864,6 @@ getTrail:
 | 
|                          useFallback = FALSE;
 | 
|                      }
 | 
|                      break;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                  case ISO8859_7:
 | 
|                      /* G0 SBCS forced to 7-bit output */
 | 
|                      len2 = MBCS_SINGLE_FROM_UCHAR32(
 | 
| @@ -1873,7 +1878,6 @@ getTrail:
 | 
|                          useFallback = FALSE;
 | 
|                      }
 | 
|                      break;
 | 
| -#endif
 | 
|                  default:
 | 
|                      /* G0 DBCS */
 | 
|                      len2 = MBCS_FROM_UCHAR32_ISO2022(
 | 
| @@ -1881,7 +1885,6 @@ getTrail:
 | 
|                                  sourceChar, &value,
 | 
|                                  useFallback, MBCS_OUTPUT_2);
 | 
|                      if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                          if(cs0 == KSC5601) {
 | 
|                              /*
 | 
|                               * Check for valid bytes for the encoding scheme.
 | 
| @@ -1893,7 +1896,6 @@ getTrail:
 | 
|                                  break;
 | 
|                              }
 | 
|                          }
 | 
| -#endif
 | 
|                          targetValue = value;
 | 
|                          len = len2;
 | 
|                          cs = cs0;
 | 
| @@ -2187,7 +2189,6 @@ escape:
 | 
|                          targetUniChar = mySourceChar;
 | 
|                      }
 | 
|                      break;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                  case ISO8859_1:
 | 
|                      if(mySourceChar <= 0x7f) {
 | 
|                          targetUniChar = mySourceChar + 0x80;
 | 
| @@ -2206,7 +2207,6 @@ escape:
 | 
|                      /* return from a single-shift state to the previous one */
 | 
|                      pToU2022State->g=pToU2022State->prevG;
 | 
|                      break;
 | 
| -#endif
 | 
|                  case JISX201:
 | 
|                      if(mySourceChar <= 0x7f) {
 | 
|                          targetUniChar = jisx201ToU(mySourceChar);
 | 
| @@ -2246,11 +2246,9 @@ getTrailByte:
 | 
|                              } else {
 | 
|                                  /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
 | 
|                                  mySourceChar = tmpSourceChar;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|                                  if (cs == KSC5601) {
 | 
|                                      tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
 | 
|                                  }
 | 
| -#endif
 | 
|                                  tempBuf[0] = (char)(tmpSourceChar >> 8);
 | 
|                                  tempBuf[1] = (char)(tmpSourceChar);
 | 
|                              }
 | 
| @@ -2312,7 +2310,7 @@ endloop:
 | 
|  }
 | 
|  
 | 
|  
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  /***************************************************************
 | 
|  *   Rules for ISO-2022-KR encoding
 | 
|  *   i) The KSC5601 designator sequence should appear only once in a file,
 | 
| @@ -3456,7 +3454,7 @@ endloop:
 | 
|      args->target = myTarget;
 | 
|      args->source = mySource;
 | 
|  }
 | 
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 | 
| +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
 | 
|  
 | 
|  static void
 | 
|  _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
 | 
| @@ -3658,7 +3656,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
 | 
|          /* include JIS X 0201 which is hardcoded */
 | 
|          sa->add(sa->set, 0xa5);
 | 
|          sa->add(sa->set, 0x203e);
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
|          if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
 | 
|              /* include Latin-1 for some variants of JP */
 | 
|              sa->addRange(sa->set, 0, 0xff);
 | 
| @@ -3666,10 +3663,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
 | 
|              /* include ASCII for JP */
 | 
|              sa->addRange(sa->set, 0, 0x7f);
 | 
|          }
 | 
| -#else
 | 
| -        /* include ASCII for JP */
 | 
| -        sa->addRange(sa->set, 0, 0x7f);
 | 
| -#endif
 | 
|          if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
 | 
|              /*
 | 
|               * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
 | 
| @@ -3688,7 +3681,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
 | 
|              sa->addRange(sa->set, HWKANA_START, HWKANA_END);
 | 
|          }
 | 
|          break;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|      case 'c':
 | 
|      case 'z':
 | 
|          /* include ASCII for CN */
 | 
| @@ -3727,7 +3720,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
 | 
|                   * corresponding to JIS X 0208.
 | 
|                   */
 | 
|                  filter=UCNV_SET_FILTER_SJIS;
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|              } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
 | 
|                         cnvData->version==0 && i==CNS_11643) {
 | 
|                  /*
 | 
| @@ -3866,7 +3859,7 @@ const UConverterSharedData _ISO2022JPData=
 | 
|  
 | 
|  }  // namespace
 | 
|  
 | 
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
 | 
| +#if !UCONFIG_ONLY_HTML_CONVERSION
 | 
|  /************* KR ***************/
 | 
|  static const UConverterImpl _ISO2022KRImpl={
 | 
|      UCNV_ISO_2022,
 | 
| @@ -3967,6 +3960,6 @@ const UConverterSharedData _ISO2022CNData=
 | 
|          UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
 | 
|  
 | 
|  }  // namespace
 | 
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
 | 
| +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
 | 
|  
 | 
|  #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
 | 
| 
 |