icu46/source/tools/tzcode/tz2icu.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/tools/tzcode/tz2icu.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1

	2 /*

	3 **********************************************************************

	4 * Copyright (c) 2003-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 **********************************************************************

	7 * Author: Alan Liu

	8 * Created: July 10 2003

	9 * Since: ICU 2.8

	10 **********************************************************************

	11 */

	12 #include "tzfile.h" // from Olson tzcode archive, copied to this dir

	13

	14 #ifdef WIN32

	15

	16 #include <windows.h>

	17 #undef min // windows.h/STL conflict

	18 #undef max // windows.h/STL conflict

	19 // "identifier was truncated to 'number' characters" warning

	20 #pragma warning(disable: 4786)

	21

	22 #else

	23

	24 #include <unistd.h>

	25 #include <stdio.h>

	26 #include <dirent.h>

	27 #include <string.h>

	28 #include <sys/stat.h>

	29

	30 #endif

	31

	32 #include <algorithm>

	33 #include <cassert>

	34 #include <ctime>

	35 #include <fstream>

	36 #include <iomanip>

	37 #include <iostream>

	38 #include <iterator>

	39 #include <limits>

	40 #include <map>

	41 #include <set>

	42 #include <sstream>

	43 #include <sstream>

	44 #include <stdexcept>

	45 #include <string>

	46 #include <vector>

	47

	48 #include "tz2icu.h"

	49 #include "unicode/uversion.h"

	50

	51 using namespace std;

	52

	53 bool ICU44PLUS = TRUE;

	54 string TZ_RESOURCE_NAME = ICU_TZ_RESOURCE;

	55

	56 //--------------------------------------------------------------------

	57 // Time utilities

	58 //--------------------------------------------------------------------

	59

	60 const int64_t SECS_PER_YEAR = 31536000; // 365 days

	61 const int64_t SECS_PER_LEAP_YEAR = 31622400; // 366 days

	62 const int64_t LOWEST_TIME32 = (int64_t)((int32_t)0x80000000);

	63 const int64_t HIGHEST_TIME32 = (int64_t)((int32_t)0x7fffffff);

	64

	65 bool isLeap(int32_t y) {

	66 return (y%4 == 0) && ((y%100 != 0) \|\| (y%400 == 0)); // Gregorian

	67 }

	68

	69 int64_t secsPerYear(int32_t y) {

	70 return isLeap(y) ? SECS_PER_LEAP_YEAR : SECS_PER_YEAR;

	71 }

	72

	73 /**

	74 * Given a calendar year, return the GMT epoch seconds for midnight

	75 * GMT of January 1 of that year. yearToSeconds(1970) == 0.

	76 */

	77 int64_t yearToSeconds(int32_t year) {

	78 // inefficient but foolproof

	79 int64_t s = 0;

	80 int32_t y = 1970;

	81 while (y < year) {

	82 s += secsPerYear(y++);

	83 }

	84 while (y > year) {

	85 s -= secsPerYear(--y);

	86 }

	87 return s;

	88 }

	89

	90 /**

	91 * Given 1970 GMT epoch seconds, return the calendar year containing

	92 * that time. secondsToYear(0) == 1970.

	93 */

	94 int32_t secondsToYear(int64_t seconds) {

	95 // inefficient but foolproof

	96 int32_t y = 1970;

	97 int64_t s = 0;

	98 if (seconds >= 0) {

	99 for (;;) {

	100 s += secsPerYear(y++);

	101 if (s > seconds) break;

	102 }

	103 --y;

	104 } else {

	105 for (;;) {

	106 s -= secsPerYear(--y);

	107 if (s <= seconds) break;

	108 }

	109 }

	110 return y;

	111 }

	112

	113 //--------------------------------------------------------------------

	114 // Types

	115 //--------------------------------------------------------------------

	116

	117 struct FinalZone;

	118 struct FinalRule;

	119 struct SimplifiedZoneType;

	120

	121 // A transition from one ZoneType to another

	122 // Minimal size = 5 bytes (4+1)

	123 struct Transition {

	124 int64_t time; // seconds, 1970 epoch

	125 int32_t type; // index into 'ZoneInfo.types' 0..255

	126 Transition(int64_t _time, int32_t _type) {

	127 time = _time;

	128 type = _type;

	129 }

	130 };

	131

	132 // A behavior mode (what zic calls a 'type') of a time zone.

	133 // Minimal size = 6 bytes (4+1+3bits)

	134 // SEE: SimplifiedZoneType

	135 struct ZoneType {

	136 int64_t rawoffset; // raw seconds offset from GMT

	137 int64_t dstoffset; // dst seconds offset from GMT

	138

	139 // We don't really need any of the following, but they are

	140 // retained for possible future use. See SimplifiedZoneType.

	141 int32_t abbr; // index into ZoneInfo.abbrs 0..n-1

	142 bool isdst;

	143 bool isstd;

	144 bool isgmt;

	145

	146 ZoneType(const SimplifiedZoneType&); // used by optimizeTypeList

	147

	148 ZoneType() : rawoffset(-1), dstoffset(-1), abbr(-1) {}

	149

	150 // A restricted equality, of just the raw and dst offset

	151 bool matches(const ZoneType& other) {

	152 return rawoffset == other.rawoffset &&

	153 dstoffset == other.dstoffset;

	154 }

	155 };

	156

	157 // A collection of transitions from one ZoneType to another, together

	158 // with a list of the ZoneTypes. A ZoneInfo object may have a long

	159 // list of transitions between a smaller list of ZoneTypes.

	160 //

	161 // This object represents the contents of a single zic-created

	162 // zoneinfo file.

	163 struct ZoneInfo {

	164 vector<Transition> transitions;

	165 vector<ZoneType> types;

	166 vector<string> abbrs;

	167

	168 string finalRuleID;

	169 int32_t finalOffset;

	170 int32_t finalYear; // -1 if none

	171

	172 // If this is an alias, then all other fields are meaningless, and

	173 // this field will point to the "real" zone 0..n-1.

	174 int32_t aliasTo; // -1 if this is a "real" zone

	175

	176 // If there are aliases TO this zone, then the following set will

	177 // contain their index numbers (each index >= 0).

	178 set<int32_t> aliases;

	179

	180 ZoneInfo() : finalYear(-1), aliasTo(-1) {}

	181

	182 void mergeFinalData(const FinalZone& fz);

	183

	184 void optimizeTypeList();

	185

	186 // Set this zone to be an alias TO another zone.

	187 void setAliasTo(int32_t index);

	188

	189 // Clear the list of aliases OF this zone.

	190 void clearAliases();

	191

	192 // Add an alias to the list of aliases OF this zone.

	193 void addAlias(int32_t index);

	194

	195 // Is this an alias to another zone?

	196 bool isAlias() const {

	197 return aliasTo >= 0;

	198 }

	199

	200 // Retrieve alias list

	201 const set<int32_t>& getAliases() const {

	202 return aliases;

	203 }

	204

	205 void print(ostream& os, const string& id) const;

	206 };

	207

	208 void ZoneInfo::clearAliases() {

	209 assert(aliasTo < 0);

	210 aliases.clear();

	211 }

	212

	213 void ZoneInfo::addAlias(int32_t index) {

	214 assert(aliasTo < 0 && index >= 0 && aliases.find(index) == aliases.end());

	215 aliases.insert(index);

	216 }

	217

	218 void ZoneInfo::setAliasTo(int32_t index) {

	219 assert(index >= 0);

	220 assert(aliases.size() == 0);

	221 aliasTo = index;

	222 }

	223

	224 typedef map<string, ZoneInfo> ZoneMap;

	225

	226 typedef ZoneMap::const_iterator ZoneMapIter;

	227

	228 //--------------------------------------------------------------------

	229 // ZONEINFO

	230 //--------------------------------------------------------------------

	231

	232 // Global map holding all our ZoneInfo objects, indexed by id.

	233 ZoneMap ZONEINFO;

	234

	235 //--------------------------------------------------------------------

	236 // zoneinfo file parsing

	237 //--------------------------------------------------------------------

	238

	239 // Read zic-coded 32-bit integer from file

	240 int64_t readcoded(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(),

	241 int64_t maxv=numeric_limits<int64_t>::max()) {

	242 unsigned char buf[4]; // must be UNSIGNED

	243 int64_t val=0;

	244 file.read((char*)buf, 4);

	245 for(int32_t i=0,shift=24;i<4;++i,shift-=8) {

	246 val \|= buf[i] << shift;

	247 }

	248 if (val < minv \|\| val > maxv) {

	249 ostringstream os;

	250 os << "coded value out-of-range: " << val << ", expected ["

	251 << minv << ", " << maxv << "]";

	252 throw out_of_range(os.str());

	253 }

	254 return val;

	255 }

	256

	257 // Read zic-coded 64-bit integer from file

	258 int64_t readcoded64(ifstream& file, int64_t minv=numeric_limits<int64_t>::min(),

	259 int64_t maxv=numeric_limits<int64_t>::max()) {

	260 unsigned char buf[8]; // must be UNSIGNED

	261 int64_t val=0;

	262 file.read((char*)buf, 8);

	263 for(int32_t i=0,shift=56;i<8;++i,shift-=8) {

	264 val \|= (int64_t)buf[i] << shift;

	265 }

	266 if (val < minv \|\| val > maxv) {

	267 ostringstream os;

	268 os << "coded value out-of-range: " << val << ", expected ["

	269 << minv << ", " << maxv << "]";

	270 throw out_of_range(os.str());

	271 }

	272 return val;

	273 }

	274

	275 // Read a boolean value

	276 bool readbool(ifstream& file) {

	277 char c;

	278 file.read(&c, 1);

	279 if (c!=0 && c!=1) {

	280 ostringstream os;

	281 os << "boolean value out-of-range: " << (int32_t)c;

	282 throw out_of_range(os.str());

	283 }

	284 return (c!=0);

	285 }

	286

	287 /**

	288 * Read the zoneinfo file structure (see tzfile.h) into a ZoneInfo

	289 * @param file an already-open file stream

	290 */

	291 void readzoneinfo(ifstream& file, ZoneInfo& info, bool is64bitData) {

	292 int32_t i;

	293

	294 // Check for TZ_ICU_MAGIC signature at file start. If we get a

	295 // signature mismatch, it means we're trying to read a file which

	296 // isn't a ICU-modified-zic-created zoneinfo file. Typically this

	297 // means the user is passing in a "normal" zoneinfo directory, or

	298 // a zoneinfo directory that is polluted with other files, or that

	299 // the user passed in the wrong directory.

	300 char buf[32];

	301 file.read(buf, 4);

	302 if (strncmp(buf, TZ_ICU_MAGIC, 4) != 0) {

	303 throw invalid_argument("TZ_ICU_MAGIC signature missing");

	304 }

	305 // skip additional Olson byte version

	306 file.read(buf, 1);

	307 // if '\0', we have just one copy of data, if '2', there is additional

	308 // 64 bit version at the end.

	309 if(buf[0]!=0 && buf[0]!='2') {

	310 throw invalid_argument("Bad Olson version info");

	311 }

	312

	313 // Read reserved bytes. The first of these will be a version byte.

	314 file.read(buf, 15);

	315 if ((ICUZoneinfoVersion)&buf != TZ_ICU_VERSION) {

	316 throw invalid_argument("File version mismatch");

	317 }

	318

	319 // Read array sizes

	320 int64_t isgmtcnt = readcoded(file, 0);

	321 int64_t isdstcnt = readcoded(file, 0);

	322 int64_t leapcnt = readcoded(file, 0);

	323 int64_t timecnt = readcoded(file, 0);

	324 int64_t typecnt = readcoded(file, 0);

	325 int64_t charcnt = readcoded(file, 0);

	326

	327 // Confirm sizes that we assume to be equal. These assumptions

	328 // are drawn from a reading of the zic source (2003a), so they

	329 // should hold unless the zic source changes.

	330 if (isgmtcnt != typecnt \|\| isdstcnt != typecnt) {

	331 throw invalid_argument("count mismatch between tzh_ttisgmtcnt, tzh_ttisd stcnt, tth_typecnt");

	332 }

	333

	334 // Used temporarily to store transition times and types. We need

	335 // to do this because the times and types are stored in two

	336 // separate arrays.

	337 vector<int64_t> transitionTimes(timecnt, -1); // temporary

	338 vector<int32_t> transitionTypes(timecnt, -1); // temporary

	339

	340 // Read transition times

	341 for (i=0; i<timecnt; ++i) {

	342 if (is64bitData) {

	343 transitionTimes[i] = readcoded64(file);

	344 } else {

	345 transitionTimes[i] = readcoded(file);

	346 }

	347 }

	348

	349 // Read transition types

	350 for (i=0; i<timecnt; ++i) {

	351 unsigned char c;

	352 file.read((char*) &c, 1);

	353 int32_t t = (int32_t) c;

	354 if (t < 0 \|\| t >= typecnt) {

	355 ostringstream os;

	356 os << "illegal type: " << t << ", expected [0, " << (typecnt-1) << " ]";

	357 throw out_of_range(os.str());

	358 }

	359 transitionTypes[i] = t;

	360 }

	361

	362 // Build transitions vector out of corresponding times and types.

	363 bool insertInitial = false;

	364 if (is64bitData && !ICU44PLUS) {

	365 if (timecnt > 0) {

	366 int32_t minidx = -1;

	367 for (i=0; i<timecnt; ++i) {

	368 if (transitionTimes[i] < LOWEST_TIME32) {

	369 if (minidx == -1 \|\| transitionTimes[i] > transitionTimes[min idx]) {

	370 // Preserve the latest transition before the 32bit minim um time

	371 minidx = i;

	372 }

	373 } else if (transitionTimes[i] > HIGHEST_TIME32) {

	374 // Skipping the rest of the transition data. We cannot put such

	375 // transitions into zoneinfo.res, because data is limited to singed

	376 // 32bit int by the ICU resource bundle.

	377 break;

	378 } else {

	379 info.transitions.push_back(Transition(transitionTimes[i], tr ansitionTypes[i]));

	380 }

	381 }

	382

	383 if (minidx != -1) {

	384 // If there are any transitions before the 32bit minimum time,

	385 // put the type information with the 32bit minimum time

	386 vector<Transition>::iterator itr = info.transitions.begin();

	387 info.transitions.insert(itr, Transition(LOWEST_TIME32, transitio nTypes[minidx]));

	388 } else {

	389 // Otherwise, we need insert the initial type later

	390 insertInitial = true;

	391 }

	392 }

	393 } else {

	394 for (i=0; i<timecnt; ++i) {

	395 info.transitions.push_back(Transition(transitionTimes[i], transition Types[i]));

	396 }

	397 }

	398

	399 // Read types (except for the isdst and isgmt flags, which come later (why?? ))

	400 for (i=0; i<typecnt; ++i) {

	401 ZoneType type;

	402

	403 type.rawoffset = readcoded(file);

	404 type.dstoffset = readcoded(file);

	405 type.isdst = readbool(file);

	406

	407 unsigned char c;

	408 file.read((char*) &c, 1);

	409 type.abbr = (int32_t) c;

	410

	411 if (type.isdst != (type.dstoffset != 0)) {

	412 throw invalid_argument("isdst does not reflect dstoffset");

	413 }

	414

	415 info.types.push_back(type);

	416 }

	417

	418 assert(info.types.size() == (unsigned) typecnt);

	419

	420 if (insertInitial) {

	421 assert(timecnt > 0);

	422 assert(typecnt > 0);

	423

	424 int32_t initialTypeIdx = -1;

	425

	426 // Check if the first type is not dst

	427 if (info.types.at(0).dstoffset != 0) {

	428 // Initial type's rawoffset is same with the rawoffset after the

	429 // first transition, but no DST is observed.

	430 int64_t rawoffset0 = (info.types.at(info.transitions.at(0).type)).ra woffset;

	431 // Look for matching type

	432 for (i=0; i<(int32_t)info.types.size(); ++i) {

	433 if (info.types.at(i).rawoffset == rawoffset0

	434 && info.types.at(i).dstoffset == 0) {

	435 initialTypeIdx = i;

	436 break;

	437 }

	438 }

	439 } else {

	440 initialTypeIdx = 0;

	441 }

	442 assert(initialTypeIdx >= 0);

	443 // Add the initial type associated with the lowest int32 time

	444 vector<Transition>::iterator itr = info.transitions.begin();

	445 info.transitions.insert(itr, Transition(LOWEST_TIME32, initialTypeIdx));

	446 }

	447

	448

	449 // Read the abbreviation string

	450 if (charcnt) {

	451 // All abbreviations are concatenated together, with a 0 at

	452 // the end of each abbr.

	453 char* str = new char[charcnt + 8];

	454 file.read(str, charcnt);

	455

	456 // Split abbreviations apart into individual strings. Record

	457 // offset of each abbr in a vector.

	458 vector<int32_t> abbroffset;

	459 char *limit=str+charcnt;

	460 for (char* p=str; p<limit; ++p) {

	461 char* start = p;

	462 while (*p != 0) ++p;

	463 info.abbrs.push_back(string(start, p-start));

	464 abbroffset.push_back(start-str);

	465 }

	466

	467 // Remap all the abbrs. Old value is offset into concatenated

	468 // raw abbr strings. New value is index into vector of

	469 // strings. E.g., 0,5,10,14 => 0,1,2,3.

	470

	471 // Keep track of which abbreviations get used.

	472 vector<bool> abbrseen(abbroffset.size(), false);

	473

	474 for (vector<ZoneType>::iterator it=info.types.begin();

	475 it!=info.types.end();

	476 ++it) {

	477 vector<int32_t>::const_iterator x=

	478 find(abbroffset.begin(), abbroffset.end(), it->abbr);

	479 if (x==abbroffset.end()) {

	480 // TODO: Modify code to add a new string to the end of

	481 // the abbr list when a middle offset is given, e.g.,

	482 // "abcdef" where * == '\0', take offset of 1 and

	483 // make the array "abc", "def", "bc", and translate 1

	484 // => 2. NOT CRITICAL since we don't even use the

	485 // abbr at this time.

	486 #if 0

	487 // TODO: Re-enable this warning if we start using

	488 // the Olson abbr data, or if the above TODO is completed.

	489 ostringstream os;

	490 os << "Warning: unusual abbr offset " << it->abbr

	491 << ", expected one of";

	492 for (vector<int32_t>::const_iterator y=abbroffset.begin();

	493 y!=abbroffset.end(); ++y) {

	494 os << ' ' << *y;

	495 }

	496 cerr << os.str() << "; using 0" << endl;

	497 #endif

	498 it->abbr = 0;

	499 } else {

	500 int32_t index = x - abbroffset.begin();

	501 it->abbr = index;

	502 abbrseen[index] = true;

	503 }

	504 }

	505

	506 for (int32_t ii=0;ii<(int32_t) abbrseen.size();++ii) {

	507 if (!abbrseen[ii]) {

	508 cerr << "Warning: unused abbreviation: " << ii << endl;

	509 }

	510 }

	511 }

	512

	513 // Read leap second info, if any.

	514 // * We discard leap second data. *

	515 for (i=0; i<leapcnt; ++i) {

	516 readcoded(file); // transition time

	517 readcoded(file); // total correction after above

	518 }

	519

	520 // Read isstd flags

	521 for (i=0; i<typecnt; ++i) info.types[i].isstd = readbool(file);

	522

	523 // Read isgmt flags

	524 for (i=0; i<typecnt; ++i) info.types[i].isgmt = readbool(file);

	525 }

	526

	527 //--------------------------------------------------------------------

	528 // Directory and file reading

	529 //--------------------------------------------------------------------

	530

	531 /**

	532 * Process a single zoneinfo file, adding the data to ZONEINFO

	533 * @param path the full path to the file, e.g., ".\zoneinfo\America\Los_Angeles"

	534 * @param id the zone ID, e.g., "America/Los_Angeles"

	535 */

	536 void handleFile(string path, string id) {

	537 // Check for duplicate id

	538 if (ZONEINFO.find(id) != ZONEINFO.end()) {

	539 ostringstream os;

	540 os << "duplicate zone ID: " << id;

	541 throw invalid_argument(os.str());

	542 }

	543

	544 ifstream file(path.c_str(), ios::in \| ios::binary);

	545 if (!file) {

	546 throw invalid_argument("can't open file");

	547 }

	548

	549 // eat 32bit data part

	550 ZoneInfo info;

	551 readzoneinfo(file, info, false);

	552

	553 // Check for errors

	554 if (!file) {

	555 throw invalid_argument("read error");

	556 }

	557

	558 // we only use 64bit part

	559 ZoneInfo info64;

	560 readzoneinfo(file, info64, true);

	561

	562 bool alldone = false;

	563 int64_t eofPos = (int64_t) file.tellg();

	564

	565 // '\n' + <envvar string> + '\n' after the 64bit version data

	566 char ch = file.get();

	567 if (ch == 0x0a) {

	568 bool invalidchar = false;

	569 while (file.get(ch)) {

	570 if (ch == 0x0a) {

	571 break;

	572 }

	573 if (ch < 0x20) {

	574 // must be printable ascii

	575 invalidchar = true;

	576 break;

	577 }

	578 }

	579 if (!invalidchar) {

	580 eofPos = (int64_t) file.tellg();

	581 file.seekg(0, ios::end);

	582 eofPos = eofPos - (int64_t) file.tellg();

	583 if (eofPos == 0) {

	584 alldone = true;

	585 }

	586 }

	587 }

	588 if (!alldone) {

	589 ostringstream os;

	590 os << (-eofPos) << " unprocessed bytes at end";

	591 throw invalid_argument(os.str());

	592 }

	593

	594 ZONEINFO[id] = info64;

	595 }

	596

	597 /**

	598 * Recursively scan the given directory, calling handleFile() for each

	599 * file in the tree. The user should call with the root directory and

	600 * a prefix of "". The function will call itself with non-empty

	601 * prefix values.

	602 */

	603 #ifdef WIN32

	604

	605 void scandir(string dirname, string prefix="") {

	606 HANDLE hList;

	607 WIN32_FIND_DATA FileData;

	608

	609 // Get the first file

	610 hList = FindFirstFile((dirname + "\\*").c_str(), &FileData);

	611 if (hList == INVALID_HANDLE_VALUE) {

	612 cerr << "Error: Invalid directory: " << dirname << endl;

	613 exit(1);

	614 }

	615 for (;;) {

	616 string name(FileData.cFileName);

	617 string path(dirname + "\\" + name);

	618 if (FileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {

	619 if (name != "." && name != "..") {

	620 scandir(path, prefix + name + "/");

	621 }

	622 } else {

	623 try {

	624 string id = prefix + name;

	625 handleFile(path, id);

	626 } catch (const exception& e) {

	627 cerr << "Error: While processing \"" << path << "\", "

	628 << e.what() << endl;

	629 exit(1);

	630 }

	631 }

	632

	633 if (!FindNextFile(hList, &FileData)) {

	634 if (GetLastError() == ERROR_NO_MORE_FILES) {

	635 break;

	636 } // else...?

	637 }

	638 }

	639 FindClose(hList);

	640 }

	641

	642 #else

	643

	644 void scandir(string dir, string prefix="") {

	645 DIR *dp;

	646 struct dirent *dir_entry;

	647 struct stat stat_info;

	648 char pwd[512];

	649 vector<string> subdirs;

	650 vector<string> subfiles;

	651

	652 if ((dp = opendir(dir.c_str())) == NULL) {

	653 cerr << "Error: Invalid directory: " << dir << endl;

	654 exit(1);

	655 }

	656 if (!getcwd(pwd, sizeof(pwd))) {

	657 cerr << "Error: Directory name too long" << endl;

	658 exit(1);

	659 }

	660 chdir(dir.c_str());

	661 while ((dir_entry = readdir(dp)) != NULL) {

	662 string name = dir_entry->d_name;

	663 string path = dir + "/" + name;

	664 lstat(dir_entry->d_name,&stat_info);

	665 if (S_ISDIR(stat_info.st_mode)) {

	666 if (name != "." && name != "..") {

	667 subdirs.push_back(path);

	668 subdirs.push_back(prefix + name + "/");

	669 // scandir(path, prefix + name + "/");

	670 }

	671 } else {

	672 try {

	673 string id = prefix + name;

	674 subfiles.push_back(path);

	675 subfiles.push_back(id);

	676 // handleFile(path, id);

	677 } catch (const exception& e) {

	678 cerr << "Error: While processing \"" << path << "\", "

	679 << e.what() << endl;

	680 exit(1);

	681 }

	682 }

	683 }

	684 closedir(dp);

	685 chdir(pwd);

	686

	687 for(int32_t i=0;i<(int32_t)subfiles.size();i+=2) {

	688 try {

	689 handleFile(subfiles[i], subfiles[i+1]);

	690 } catch (const exception& e) {

	691 cerr << "Error: While processing \"" << subfiles[i] << "\", "

	692 << e.what() << endl;

	693 exit(1);

	694 }

	695 }

	696 for(int32_t i=0;i<(int32_t)subdirs.size();i+=2) {

	697 scandir(subdirs[i], subdirs[i+1]);

	698 }

	699 }

	700

	701 #endif

	702

	703 //--------------------------------------------------------------------

	704 // Final zone and rule info

	705 //--------------------------------------------------------------------

	706

	707 /**

	708 * Read and discard the current line.

	709 */

	710 void consumeLine(istream& in) {

	711 int32_t c;

	712 do {

	713 c = in.get();

	714 } while (c != EOF && c != '\n');

	715 }

	716

	717 enum {

	718 DOM = 0,

	719 DOWGEQ = 1,

	720 DOWLEQ = 2

	721 };

	722

	723 const char* TIME_MODE[] = {"w", "s", "u"};

	724

	725 // Allow 29 days in February because zic outputs February 29

	726 // for rules like "last Sunday in February".

	727 const int32_t MONTH_LEN[] = {31,29,31,30,31,30,31,31,30,31,30,31};

	728

	729 const int32_t HOUR = 3600;

	730

	731 struct FinalZone {

	732 int32_t offset; // raw offset

	733 int32_t year; // takes effect for y >= year

	734 string ruleid;

	735 set<string> aliases;

	736 FinalZone(int32_t _offset, int32_t _year, const string& _ruleid) :

	737 offset(_offset), year(_year), ruleid(_ruleid) {

	738 if (offset <= -16HOUR \|\| offset >= 16HOUR) {

	739 ostringstream os;

	740 os << "Invalid input offset " << offset

	741 << " for year " << year

	742 << " and rule ID " << ruleid;

	743 throw invalid_argument(os.str());

	744 }

	745 if (year < 1900 \|\| year >= 2050) {

	746 ostringstream os;

	747 os << "Invalid input year " << year

	748 << " with offset " << offset

	749 << " and rule ID " << ruleid;

	750 throw invalid_argument(os.str());

	751 }

	752 }

	753 FinalZone() : offset(-1), year(-1) {}

	754 void addLink(const string& alias) {

	755 if (aliases.find(alias) != aliases.end()) {

	756 ostringstream os;

	757 os << "Duplicate alias " << alias;

	758 throw invalid_argument(os.str());

	759 }

	760 aliases.insert(alias);

	761 }

	762 };

	763

	764 struct FinalRulePart {

	765 int32_t mode;

	766 int32_t month;

	767 int32_t dom;

	768 int32_t dow;

	769 int32_t time;

	770 int32_t offset; // dst offset, usually either 0 or 1:00

	771

	772 // Isstd and isgmt only have 3 valid states, corresponding to local

	773 // wall time, local standard time, and GMT standard time.

	774 // Here is how the isstd & isgmt flags are set by zic:

	775 //\| case 's': /* Standard */

	776 //\| rp->r_todisstd = TRUE;

	777 //\| rp->r_todisgmt = FALSE;

	778 //\| case 'w': /* Wall */

	779 //\| rp->r_todisstd = FALSE;

	780 //\| rp->r_todisgmt = FALSE;

	781 //\| case 'g': /* Greenwich */

	782 //\| case 'u': /* Universal */

	783 //\| case 'z': /* Zulu */

	784 //\| rp->r_todisstd = TRUE;

	785 //\| rp->r_todisgmt = TRUE;

	786 bool isstd;

	787 bool isgmt;

	788

	789 bool isset; // used during building; later ignored

	790

	791 FinalRulePart() : isset(false) {}

	792 void set(const string& id,

	793 const string& _mode,

	794 int32_t _month,

	795 int32_t _dom,

	796 int32_t _dow,

	797 int32_t _time,

	798 bool _isstd,

	799 bool _isgmt,

	800 int32_t _offset) {

	801 if (isset) {

	802 throw invalid_argument("FinalRulePart set twice");

	803 }

	804 isset = true;

	805 if (_mode == "DOWLEQ") {

	806 mode = DOWLEQ;

	807 } else if (_mode == "DOWGEQ") {

	808 mode = DOWGEQ;

	809 } else if (_mode == "DOM") {

	810 mode = DOM;

	811 } else {

	812 throw invalid_argument("Unrecognized FinalRulePart mode");

	813 }

	814 month = _month;

	815 dom = _dom;

	816 dow = _dow;

	817 time = _time;

	818 isstd = _isstd;

	819 isgmt = _isgmt;

	820 offset = _offset;

	821

	822 ostringstream os;

	823 if (month < 0 \|\| month >= 12) {

	824 os << "Invalid input month " << month;

	825 }

	826 if (dom < 1 \|\| dom > MONTH_LEN[month]) {

	827 os << "Invalid input day of month " << dom;

	828 }

	829 if (mode != DOM && (dow < 0 \|\| dow >= 7)) {

	830 os << "Invalid input day of week " << dow;

	831 }

	832 if (offset < 0 \|\| offset > HOUR) {

	833 os << "Invalid input offset " << offset;

	834 }

	835 if (isgmt && !isstd) {

	836 os << "Invalid input isgmt && !isstd";

	837 }

	838 if (!os.str().empty()) {

	839 os << " for rule "

	840 << id

	841 << _mode

	842 << month << dom << dow << time

	843 << isstd << isgmt

	844 << offset;

	845 throw invalid_argument(os.str());

	846 }

	847 }

	848

	849 /**

	850 * Return the time mode as an ICU SimpleTimeZone int from 0..2;

	851 * see simpletz.h.

	852 */

	853 int32_t timemode() const {

	854 if (isgmt) {

	855 assert(isstd);

	856 return 2; // gmt standard

	857 }

	858 if (isstd) {

	859 return 1; // local standard

	860 }

	861 return 0; // local wall

	862 }

	863

	864 // The SimpleTimeZone encoding method for rules is as follows:

	865 // stz_dowim stz_dow

	866 // DOM: dom 0

	867 // DOWGEQ: dom -(dow+1)

	868 // DOWLEQ: -dom -(dow+1)

	869 // E.g., to encode Mon>=7, use stz_dowim=7, stz_dow=-2

	870 // to encode Mon<=7, use stz_dowim=-7, stz_dow=-2

	871 // to encode 7, use stz_dowim=7, stz_dow=0

	872 // Note that for this program and for SimpleTimeZone, 0==Jan,

	873 // but for this program 0==Sun while for SimpleTimeZone 1==Sun.

	874

	875 /**

	876 * Return a "dowim" param suitable for SimpleTimeZone.

	877 */

	878 int32_t stz_dowim() const {

	879 return (mode == DOWLEQ) ? -dom : dom;

	880 }

	881

	882 /**

	883 * Return a "dow" param suitable for SimpleTimeZone.

	884 */

	885 int32_t stz_dow() const {

	886 return (mode == DOM) ? 0 : -(dow+1);

	887 }

	888 };

	889

	890 struct FinalRule {

	891 FinalRulePart part[2];

	892

	893 bool isset() const {

	894 return part[0].isset && part[1].isset;

	895 }

	896

	897 void print(ostream& os) const;

	898 };

	899

	900 map<string,FinalZone> finalZones;

	901 map<string,FinalRule> finalRules;

	902

	903 map<string, set<string> > links;

	904 map<string, string> reverseLinks;

	905 map<string, string> linkSource; // id => "Olson link" or "ICU alias"

	906

	907 /**

	908 * Predicate used to find FinalRule objects that do not have both

	909 * sub-parts set (indicating an error in the input file).

	910 */

	911 bool isNotSet(const pair<const string,FinalRule>& p) {

	912 return !p.second.isset();

	913 }

	914

	915 /**

	916 * Predicate used to find FinalZone objects that do not map to a known

	917 * rule (indicating an error in the input file).

	918 */

	919 bool mapsToUnknownRule(const pair<const string,FinalZone>& p) {

	920 return finalRules.find(p.second.ruleid) == finalRules.end();

	921 }

	922

	923 /**

	924 * This set is used to make sure each rule in finalRules is used at

	925 * least once. First we populate it with all the rules from

	926 * finalRules; then we remove all the rules referred to in

	927 * finaleZones.

	928 */

	929 set<string> ruleIDset;

	930

	931 void insertRuleID(const pair<string,FinalRule>& p) {

	932 ruleIDset.insert(p.first);

	933 }

	934

	935 void eraseRuleID(const pair<string,FinalZone>& p) {

	936 ruleIDset.erase(p.second.ruleid);

	937 }

	938

	939 /**

	940 * Populate finalZones and finalRules from the given istream.

	941 */

	942 void readFinalZonesAndRules(istream& in) {

	943

	944 for (;;) {

	945 string token;

	946 in >> token;

	947 if (in.eof() \|\| !in) {

	948 break;

	949 } else if (token == "zone") {

	950 // zone Africa/Cairo 7200 1995 Egypt # zone Africa/Cairo, offset 720 0, year >= 1995, rule Egypt (0)

	951 string id, ruleid;

	952 int32_t offset, year;

	953 in >> id >> offset >> year >> ruleid;

	954 consumeLine(in);

	955 finalZones[id] = FinalZone(offset, year, ruleid);

	956 } else if (token == "rule") {

	957 // rule US DOWGEQ 3 1 0 7200 0 0 3600 # 52: US, file data/northameri ca, line 119, mode DOWGEQ, April, dom 1, Sunday, time 7200, isstd 0, isgmt 0, of fset 3600

	958 // rule US DOWLEQ 9 31 0 7200 0 0 0 # 53: US, file data/northamerica , line 114, mode DOWLEQ, October, dom 31, Sunday, time 7200, isstd 0, isgmt 0, o ffset 0

	959 string id, mode;

	960 int32_t month, dom, dow, time, offset;

	961 bool isstd, isgmt;

	962 in >> id >> mode >> month >> dom >> dow >> time >> isstd >> isgmt >> offset;

	963 consumeLine(in);

	964 FinalRule& fr = finalRules[id];

	965 int32_t p = fr.part[0].isset ? 1 : 0;

	966 fr.part[p].set(id, mode, month, dom, dow, time, isstd, isgmt, offset );

	967 } else if (token == "link") {

	968 string fromid, toid; // fromid == "real" zone, toid == alias

	969 in >> fromid >> toid;

	970 // DO NOT consumeLine(in);

	971 if (finalZones.find(toid) != finalZones.end()) {

	972 throw invalid_argument("Bad link: `to' id is a \"real\" zone");

	973 }

	974

	975 links[fromid].insert(toid);

	976 reverseLinks[toid] = fromid;

	977

	978 linkSource[fromid] = "Olson link";

	979 linkSource[toid] = "Olson link";

	980 } else if (token.length() > 0 && token[0] == '#') {

	981 consumeLine(in);

	982 } else {

	983 throw invalid_argument("Unrecognized keyword");

	984 }

	985 }

	986

	987 if (!in.eof() && !in) {

	988 throw invalid_argument("Parse failure");

	989 }

	990

	991 // Perform validity check: Each rule should have data for 2 parts.

	992 if (count_if(finalRules.begin(), finalRules.end(), isNotSet) != 0) {

	993 throw invalid_argument("One or more incomplete rule pairs");

	994 }

	995

	996 // Perform validity check: Each zone should map to a known rule.

	997 if (count_if(finalZones.begin(), finalZones.end(), mapsToUnknownRule) != 0) {

	998 throw invalid_argument("One or more zones refers to an unknown rule");

	999 }

	1000

	1001 // Perform validity check: Each rule should be referred to by a zone.

	1002 ruleIDset.clear();

	1003 for_each(finalRules.begin(), finalRules.end(), insertRuleID);

	1004 for_each(finalZones.begin(), finalZones.end(), eraseRuleID);

	1005 if (ruleIDset.size() != 0) {

	1006 throw invalid_argument("Unused rules");

	1007 }

	1008 }

	1009

	1010 //--------------------------------------------------------------------

	1011 // Resource bundle output

	1012 //--------------------------------------------------------------------

	1013

	1014 // SEE olsontz.h FOR RESOURCE BUNDLE DATA LAYOUT

	1015

	1016 void ZoneInfo::print(ostream& os, const string& id) const {

	1017 // Implement compressed format #2:

	1018 os << " /* " << id << " */ ";

	1019

	1020 if (aliasTo >= 0) {

	1021 assert(aliases.size() == 0);

	1022 os << ":int { " << aliasTo << " } "; // No endl - save room for comment.

	1023 return;

	1024 }

	1025

	1026 if (ICU44PLUS) {

	1027 os << ":table {" << endl;

	1028 } else {

	1029 os << ":array {" << endl;

	1030 }

	1031

	1032 vector<Transition>::const_iterator trn;

	1033 vector<ZoneType>::const_iterator typ;

	1034

	1035 bool first;

	1036

	1037 if (ICU44PLUS) {

	1038 trn = transitions.begin();

	1039

	1040 // pre 32bit transitions

	1041 if (trn != transitions.end() && trn->time < LOWEST_TIME32) {

	1042 os << " transPre32:intvector { ";

	1043 for (first = true; trn != transitions.end() && trn->time < LOWEST_TI ME32; ++trn) {

	1044 if (!first) {

	1045 os<< ", ";

	1046 }

	1047 first = false;

	1048 os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff);

	1049 }

	1050 os << " }" << endl;

	1051 }

	1052

	1053 // 32bit transtions

	1054 if (trn != transitions.end() && trn->time < HIGHEST_TIME32) {

	1055 os << " trans:intvector { ";

	1056 for (first = true; trn != transitions.end() && trn->time < HIGHEST_T IME32; ++trn) {

	1057 if (!first) {

	1058 os << ", ";

	1059 }

	1060 first = false;

	1061 os << trn->time;

	1062 }

	1063 os << " }" << endl;

	1064 }

	1065

	1066 // post 32bit transitons

	1067 if (trn != transitions.end()) {

	1068 os << " transPost32:intvector { ";

	1069 for (first = true; trn != transitions.end(); ++trn) {

	1070 if (!first) {

	1071 os<< ", ";

	1072 }

	1073 first = false;

	1074 os << (int32_t)(trn->time >> 32) << ", " << (int32_t)(trn->time & 0x00000000ffffffff);

	1075 }

	1076 os << " }" << endl;

	1077 }

	1078 } else {

	1079 os << " :intvector { ";

	1080 for (trn = transitions.begin(), first = true; trn != transitions.end(); ++trn) {

	1081 if (!first) os << ", ";

	1082 first = false;

	1083 os << trn->time;

	1084 }

	1085 os << " }" << endl;

	1086 }

	1087

	1088

	1089 first=true;

	1090 if (ICU44PLUS) {

	1091 os << " typeOffsets:intvector { ";

	1092 } else {

	1093 os << " :intvector { ";

	1094 }

	1095 for (typ = types.begin(); typ != types.end(); ++typ) {

	1096 if (!first) os << ", ";

	1097 first = false;

	1098 os << typ->rawoffset << ", " << typ->dstoffset;

	1099 }

	1100 os << " }" << endl;

	1101

	1102 if (ICU44PLUS) {

	1103 if (transitions.size() != 0) {

	1104 os << " typeMap:bin { \"" << hex << setfill('0');

	1105 for (trn = transitions.begin(); trn != transitions.end(); ++trn) {

	1106 os << setw(2) << trn->type;

	1107 }

	1108 os << dec << "\" }" << endl;

	1109 }

	1110 } else {

	1111 os << " :bin { \"" << hex << setfill('0');

	1112 for (trn = transitions.begin(); trn != transitions.end(); ++trn) {

	1113 os << setw(2) << trn->type;

	1114 }

	1115 os << dec << "\" }" << endl;

	1116 }

	1117

	1118 // Final zone info, if any

	1119 if (finalYear != -1) {

	1120 if (ICU44PLUS) {

	1121 os << " finalRule { \"" << finalRuleID << "\" }" << endl;

	1122 os << " finalRaw:int { " << finalOffset << " }" << endl;

	1123 os << " finalYear:int { " << finalYear << " }" << endl;

	1124 } else {

	1125 os << " \"" << finalRuleID << "\"" << endl;

	1126 os << " :intvector { " << finalOffset << ", "

	1127 << finalYear << " }" << endl;

	1128 }

	1129 }

	1130

	1131 // Alias list, if any

	1132 if (aliases.size() != 0) {

	1133 first = true;

	1134 if (ICU44PLUS) {

	1135 os << " links:intvector { ";

	1136 } else {

	1137 os << " :intvector { ";

	1138 }

	1139 for (set<int32_t>::const_iterator i=aliases.begin(); i!=aliases.end(); + +i) {

	1140 if (!first) os << ", ";

	1141 first = false;

	1142 os << *i;

	1143 }

	1144 os << " }" << endl;

	1145 }

	1146

	1147 os << " } "; // no trailing 'endl', so comments can be placed.

	1148 }

	1149

	1150 inline ostream&

	1151 operator<<(ostream& os, const ZoneMap& zoneinfo) {

	1152 int32_t c = 0;

	1153 for (ZoneMapIter it = zoneinfo.begin();

	1154 it != zoneinfo.end();

	1155 ++it) {

	1156 if(c && !ICU44PLUS) os << ",";

	1157 it->second.print(os, it->first);

	1158 os << "//Z#" << c++ << endl;

	1159 }

	1160 return os;

	1161 }

	1162

	1163 // print the string list

	1164 ostream& printStringList( ostream& os, const ZoneMap& zoneinfo) {

	1165 int32_t n = 0; // count

	1166 int32_t col = 0; // column

	1167 os << " Names {" << endl

	1168 << " ";

	1169 for (ZoneMapIter it = zoneinfo.begin();

	1170 it != zoneinfo.end();

	1171 ++it) {

	1172 if(n) {

	1173 os << ",";

	1174 col ++;

	1175 }

	1176 const string& id = it->first;

	1177 os << "\"" << id << "\"";

	1178 col += id.length() + 2;

	1179 if(col >= 50) {

	1180 os << " // " << n << endl

	1181 << " ";

	1182 col = 0;

	1183 }

	1184 n++;

	1185 }

	1186 os << " // " << (n-1) << endl

	1187 << " }" << endl;

	1188

	1189 return os;

	1190 }

	1191

	1192 //--------------------------------------------------------------------

	1193 // main

	1194 //--------------------------------------------------------------------

	1195

	1196 // Unary predicate for finding transitions after a given time

	1197 bool isAfter(const Transition t, int64_t thresh) {

	1198 return t.time >= thresh;

	1199 }

	1200

	1201 /**

	1202 * A zone type that contains only the raw and dst offset. Used by the

	1203 * optimizeTypeList() method.

	1204 */

	1205 struct SimplifiedZoneType {

	1206 int64_t rawoffset;

	1207 int64_t dstoffset;

	1208 SimplifiedZoneType() : rawoffset(-1), dstoffset(-1) {}

	1209 SimplifiedZoneType(const ZoneType& t) : rawoffset(t.rawoffset),

	1210 dstoffset(t.dstoffset) {}

	1211 bool operator<(const SimplifiedZoneType& t) const {

	1212 return rawoffset < t.rawoffset \|\|

	1213 (rawoffset == t.rawoffset &&

	1214 dstoffset < t.dstoffset);

	1215 }

	1216 };

	1217

	1218 /**

	1219 * Construct a ZoneType from a SimplifiedZoneType. Note that this

	1220 * discards information; the new ZoneType will have meaningless

	1221 * (empty) abbr, isdst, isstd, and isgmt flags; this is appropriate,

	1222 * since ignoring these is how we do optimization (we have no use for

	1223 * these in historical transitions).

	1224 */

	1225 ZoneType::ZoneType(const SimplifiedZoneType& t) :

	1226 rawoffset(t.rawoffset), dstoffset(t.dstoffset),

	1227 abbr(-1), isdst(false), isstd(false), isgmt(false) {}

	1228

	1229 /**

	1230 * Optimize the type list to remove excess entries. The type list may

	1231 * contain entries that are distinct only in terms of their dst, std,

	1232 * or gmt flags. Since we don't care about those flags, we can reduce

	1233 * the type list to a set of unique raw/dst offset pairs, and remap

	1234 * the type indices in the transition list, which stores, for each

	1235 * transition, a transition time and a type index.

	1236 */

	1237 void ZoneInfo::optimizeTypeList() {

	1238 // Assemble set of unique types; only those in the `transitions'

	1239 // list, since there may be unused types in the `types' list

	1240 // corresponding to transitions that have been trimmed (during

	1241 // merging of final data).

	1242

	1243 if (aliasTo >= 0) return; // Nothing to do for aliases

	1244

	1245 if (!ICU44PLUS) {

	1246 // This is the old logic which has a bug, which occasionally removes

	1247 // the type before the first transition. The problem was fixed

	1248 // by inserting the dummy transition indirectly.

	1249

	1250 // If there are zero transitions and one type, then leave that as-is.

	1251 if (transitions.size() == 0) {

	1252 if (types.size() != 1) {

	1253 cerr << "Error: transition count = 0, type count = " << types.si ze() << endl;

	1254 }

	1255 return;

	1256 }

	1257

	1258 set<SimplifiedZoneType> simpleset;

	1259 for (vector<Transition>::const_iterator i=transitions.begin();

	1260 i!=transitions.end(); ++i) {

	1261 assert(i->type < (int32_t)types.size());

	1262 simpleset.insert(types[i->type]);

	1263 }

	1264

	1265 // Map types to integer indices

	1266 map<SimplifiedZoneType,int32_t> simplemap;

	1267 int32_t n=0;

	1268 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin();

	1269 i!=simpleset.end(); ++i) {

	1270 simplemap[*i] = n++;

	1271 }

	1272

	1273 // Remap transitions

	1274 for (vector<Transition>::iterator i=transitions.begin();

	1275 i!=transitions.end(); ++i) {

	1276 assert(i->type < (int32_t)types.size());

	1277 ZoneType oldtype = types[i->type];

	1278 SimplifiedZoneType newtype(oldtype);

	1279 assert(simplemap.find(newtype) != simplemap.end());

	1280 i->type = simplemap[newtype];

	1281 }

	1282

	1283 // Replace type list

	1284 types.clear();

	1285 copy(simpleset.begin(), simpleset.end(), back_inserter(types));

	1286

	1287 } else {

	1288 if (types.size() > 1) {

	1289 // Note: localtime uses the very first non-dst type as initial offse ts.

	1290 // If all types are DSTs, the very first type is treated as the init ial offsets.

	1291

	1292 // Decide a type used as the initial offsets. ICU put the type at i ndex 0.

	1293 ZoneType initialType = types[0];

	1294 for (vector<ZoneType>::const_iterator i=types.begin(); i!=types.end( ); ++i) {

	1295 if (i->dstoffset == 0) {

	1296 initialType = *i;

	1297 break;

	1298 }

	1299 }

	1300

	1301 SimplifiedZoneType initialSimplifiedType(initialType);

	1302

	1303 // create a set of unique types, but ignoring fields which we're not interested in

	1304 set<SimplifiedZoneType> simpleset;

	1305 simpleset.insert(initialSimplifiedType);

	1306 for (vector<Transition>::const_iterator i=transitions.begin(); i!=tr ansitions.end(); ++i) {

	1307 assert(i->type < (int32_t)types.size());

	1308 simpleset.insert(types[i->type]);

	1309 }

	1310

	1311 // Map types to integer indices, however, keeping the first type at offset 0

	1312 map<SimplifiedZoneType,int32_t> simplemap;

	1313 simplemap[initialSimplifiedType] = 0;

	1314 int32_t n = 1;

	1315 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i! =simpleset.end(); ++i) {

	1316 if (i < initialSimplifiedType \|\| initialSimplifiedType < i) {

	1317 simplemap[*i] = n++;

	1318 }

	1319 }

	1320

	1321 // Remap transitions

	1322 for (vector<Transition>::iterator i=transitions.begin();

	1323 i!=transitions.end(); ++i) {

	1324 assert(i->type < (int32_t)types.size());

	1325 ZoneType oldtype = types[i->type];

	1326 SimplifiedZoneType newtype(oldtype);

	1327 assert(simplemap.find(newtype) != simplemap.end());

	1328 i->type = simplemap[newtype];

	1329 }

	1330

	1331 // Replace type list

	1332 types.clear();

	1333 types.push_back(initialSimplifiedType);

	1334 for (set<SimplifiedZoneType>::const_iterator i=simpleset.begin(); i! =simpleset.end(); ++i) {

	1335 if (i < initialSimplifiedType \|\| initialSimplifiedType < i) {

	1336 types.push_back(*i);

	1337 }

	1338 }

	1339

	1340 // Reiterating transitions to remove any transitions which

	1341 // do not actually change the raw/dst offsets

	1342 int32_t prevTypeIdx = 0;

	1343 for (vector<Transition>::iterator i=transitions.begin(); i!=transiti ons.end();) {

	1344 if (i->type == prevTypeIdx) {

	1345 // this is not a time transition, probably just name change

	1346 // e.g. America/Resolute after 2006 in 2010b

	1347 transitions.erase(i);

	1348 } else {

	1349 prevTypeIdx = i->type;

	1350 i++;

	1351 }

	1352 }

	1353 }

	1354 }

	1355

	1356 }

	1357

	1358 /**

	1359 * Merge final zone data into this zone.

	1360 */

	1361 void ZoneInfo::mergeFinalData(const FinalZone& fz) {

	1362 int32_t year = fz.year;

	1363 int64_t seconds = yearToSeconds(year);

	1364

	1365 if (!ICU44PLUS) {

	1366 if (seconds > HIGHEST_TIME32) {

	1367 // Avoid transitions beyond signed 32bit max second.

	1368 // This may result incorrect offset computation around

	1369 // HIGHEST_TIME32. This is a limitation of ICU

	1370 // before 4.4.

	1371 seconds = HIGHEST_TIME32;

	1372 }

	1373 }

	1374

	1375 vector<Transition>::iterator it =

	1376 find_if(transitions.begin(), transitions.end(),

	1377 bind2nd(ptr_fun(isAfter), seconds));

	1378 transitions.erase(it, transitions.end());

	1379

	1380 if (finalYear != -1) {

	1381 throw invalid_argument("Final zone already merged in");

	1382 }

	1383 finalYear = fz.year;

	1384 finalOffset = fz.offset;

	1385 finalRuleID = fz.ruleid;

	1386 }

	1387

	1388 /**

	1389 * Merge the data from the given final zone into the core zone data by

	1390 * calling the ZoneInfo member function mergeFinalData.

	1391 */

	1392 void mergeOne(const string& zoneid, const FinalZone& fz) {

	1393 if (ZONEINFO.find(zoneid) == ZONEINFO.end()) {

	1394 throw invalid_argument("Unrecognized final zone ID");

	1395 }

	1396 ZONEINFO[zoneid].mergeFinalData(fz);

	1397 }

	1398

	1399 /**

	1400 * Visitor function that merges the final zone data into the main zone

	1401 * data structures. It calls mergeOne for each final zone and its

	1402 * list of aliases.

	1403 */

	1404 void mergeFinalZone(const pair<string,FinalZone>& p) {

	1405 const string& id = p.first;

	1406 const FinalZone& fz = p.second;

	1407

	1408 mergeOne(id, fz);

	1409 }

	1410

	1411 /**

	1412 * Print this rule in resource bundle format to os. ID and enclosing

	1413 * braces handled elsewhere.

	1414 */

	1415 void FinalRule::print(ostream& os) const {

	1416 // First print the rule part that enters DST; then the rule part

	1417 // that exits it.

	1418 int32_t whichpart = (part[0].offset != 0) ? 0 : 1;

	1419 assert(part[whichpart].offset != 0);

	1420 assert(part[1-whichpart].offset == 0);

	1421

	1422 os << " ";

	1423 for (int32_t i=0; i<2; ++i) {

	1424 const FinalRulePart& p = part[whichpart];

	1425 whichpart = 1-whichpart;

	1426 os << p.month << ", " << p.stz_dowim() << ", " << p.stz_dow() << ", "

	1427 << p.time << ", " << p.timemode() << ", ";

	1428 }

	1429 os << part[whichpart].offset << endl;

	1430 }

	1431

	1432 int main(int argc, char *argv[]) {

	1433 string rootpath, zonetab, version;

	1434 bool validArgs = FALSE;

	1435

	1436 if (argc == 4 \|\| argc == 5) {

	1437 validArgs = TRUE;

	1438 rootpath = argv[1];

	1439 zonetab = argv[2];

	1440 version = argv[3];

	1441 if (argc == 5) {

	1442 if (strcmp(argv[4], "--old") == 0) {

	1443 ICU44PLUS = FALSE;

	1444 TZ_RESOURCE_NAME = ICU_TZ_RESOURCE_OLD;

	1445 } else {

	1446 validArgs = FALSE;

	1447 }

	1448 }

	1449 }

	1450 if (!validArgs) {

	1451 cout << "Usage: tz2icu <dir> <cmap> <tzver> [--old]" << endl

	1452 << " <dir> path to zoneinfo file tree generated by" << endl

	1453 << " ICU-patched version of zic" << endl

	1454 << " <cmap> country map, from tzdata archive," << endl

	1455 << " typically named \"zone.tab\"" << endl

	1456 << " <tzver> version string, such as \"2003e\"" << endl

	1457 << " --old generating resource format before ICU4.4" << endl;

	1458 exit(1);

	1459 }

	1460

	1461 cout << "Olson data version: " << version << endl;

	1462 cout << "ICU 4.4+ format: " << (ICU44PLUS ? "Yes" : "No") << endl;

	1463

	1464 try {

	1465 ifstream finals(ICU_ZONE_FILE);

	1466 if (finals) {

	1467 readFinalZonesAndRules(finals);

	1468

	1469 cout << "Finished reading " << finalZones.size()

	1470 << " final zones and " << finalRules.size()

	1471 << " final rules from " ICU_ZONE_FILE << endl;

	1472 } else {

	1473 cerr << "Error: Unable to open " ICU_ZONE_FILE << endl;

	1474 return 1;

	1475 }

	1476 } catch (const exception& error) {

	1477 cerr << "Error: While reading " ICU_ZONE_FILE ": " << error.what() << en dl;

	1478 return 1;

	1479 }

	1480

	1481 try {

	1482 // Recursively scan all files below the given path, accumulating

	1483 // their data into ZONEINFO. All files must be TZif files. Any

	1484 // failure along the way will result in a call to exit(1).

	1485 scandir(rootpath);

	1486 } catch (const exception& error) {

	1487 cerr << "Error: While scanning " << rootpath << ": " << error.what() << endl;

	1488 return 1;

	1489 }

	1490

	1491 cout << "Finished reading " << ZONEINFO.size() << " zoneinfo files ["

	1492 << (ZONEINFO.begin())->first << ".."

	1493 << (--ZONEINFO.end())->first << "]" << endl;

	1494

	1495 try {

	1496 for_each(finalZones.begin(), finalZones.end(), mergeFinalZone);

	1497 } catch (const exception& error) {

	1498 cerr << "Error: While merging final zone data: " << error.what() << endl ;

	1499 return 1;

	1500 }

	1501

	1502 // Process links (including ICU aliases). For each link set we have

	1503 // a canonical ID (e.g., America/Los_Angeles) and a set of one or more

	1504 // aliases (e.g., PST, PST8PDT, ...).

	1505

	1506 // 1. Add all aliases as zone objects in ZONEINFO

	1507 for (map<string,set<string> >::const_iterator i = links.begin();

	1508 i!=links.end(); ++i) {

	1509 const string& olson = i->first;

	1510 const set<string>& aliases = i->second;

	1511 if (ZONEINFO.find(olson) == ZONEINFO.end()) {

	1512 cerr << "Error: Invalid " << linkSource[olson] << " to non-existent \""

	1513 << olson << "\"" << endl;

	1514 return 1;

	1515 }

	1516 for (set<string>::const_iterator j=aliases.begin();

	1517 j!=aliases.end(); ++j) {

	1518 ZONEINFO[*j] = ZoneInfo();

	1519 }

	1520 }

	1521

	1522 // 2. Create a mapping from zones to index numbers 0..n-1.

	1523 map<string,int32_t> zoneIDs;

	1524 vector<string> zoneIDlist;

	1525 int32_t z=0;

	1526 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {

	1527 zoneIDs[i->first] = z++;

	1528 zoneIDlist.push_back(i->first);

	1529 }

	1530 assert(z == (int32_t) ZONEINFO.size());

	1531

	1532 // 3. Merge aliases. Sometimes aliases link to other aliases; we

	1533 // resolve these into simplest possible sets.

	1534 map<string,set<string> > links2;

	1535 map<string,string> reverse2;

	1536 for (map<string,set<string> >::const_iterator i = links.begin();

	1537 i!=links.end(); ++i) {

	1538 string olson = i->first;

	1539 while (reverseLinks.find(olson) != reverseLinks.end()) {

	1540 olson = reverseLinks[olson];

	1541 }

	1542 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.end() ; ++j) {

	1543 links2[olson].insert(*j);

	1544 reverse2[*j] = olson;

	1545 }

	1546 }

	1547 links = links2;

	1548 reverseLinks = reverse2;

	1549

	1550 if (false) { // Debugging: Emit link map

	1551 for (map<string,set<string> >::const_iterator i = links.begin();

	1552 i!=links.end(); ++i) {

	1553 cout << i->first << ": ";

	1554 for (set<string>::const_iterator j=i->second.begin(); j!=i->second.e nd(); ++j) {

	1555 cout << *j << ", ";

	1556 }

	1557 cout << endl;

	1558 }

	1559 }

	1560

	1561 // 4. Update aliases

	1562 for (map<string,set<string> >::const_iterator i = links.begin();

	1563 i!=links.end(); ++i) {

	1564 const string& olson = i->first;

	1565 const set<string>& aliases = i->second;

	1566 ZONEINFO[olson].clearAliases();

	1567 ZONEINFO[olson].addAlias(zoneIDs[olson]);

	1568 for (set<string>::const_iterator j=aliases.begin();

	1569 j!=aliases.end(); ++j) {

	1570 assert(zoneIDs.find(olson) != zoneIDs.end());

	1571 assert(zoneIDs.find(*j) != zoneIDs.end());

	1572 assert(ZONEINFO.find(*j) != ZONEINFO.end());

	1573 ZONEINFO[*j].setAliasTo(zoneIDs[olson]);

	1574 ZONEINFO[olson].addAlias(zoneIDs[*j]);

	1575 }

	1576 }

	1577

	1578 // Once merging of final data is complete, we can optimize the type list

	1579 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {

	1580 i->second.optimizeTypeList();

	1581 }

	1582

	1583 // Create the country map

	1584 map<string, set<string> > countryMap; // country -> set of zones

	1585 map<string, string> reverseCountryMap; // zone -> country

	1586 try {

	1587 ifstream f(zonetab.c_str());

	1588 if (!f) {

	1589 cerr << "Error: Unable to open " << zonetab << endl;

	1590 return 1;

	1591 }

	1592 int32_t n = 0;

	1593 string line;

	1594 while (getline(f, line)) {

	1595 string::size_type lb = line.find('#');

	1596 if (lb != string::npos) {

	1597 line.resize(lb); // trim comments

	1598 }

	1599 string country, coord, zone;

	1600 istringstream is(line);

	1601 is >> country >> coord >> zone;

	1602 if (country.size() == 0) continue;

	1603 if (country.size() != 2 \|\| zone.size() < 1) {

	1604 cerr << "Error: Can't parse " << line << " in " << zonetab << en dl;

	1605 return 1;

	1606 }

	1607 if (ZONEINFO.find(zone) == ZONEINFO.end()) {

	1608 cerr << "Error: Country maps to invalid zone " << zone

	1609 << " in " << zonetab << endl;

	1610 return 1;

	1611 }

	1612 countryMap[country].insert(zone);

	1613 reverseCountryMap[zone] = country;

	1614 //cerr << (n+1) << ": " << country << " <=> " << zone << endl;

	1615 ++n;

	1616 }

	1617 cout << "Finished reading " << n

	1618 << " country entries from " << zonetab << endl;

	1619 } catch (const exception& error) {

	1620 cerr << "Error: While reading " << zonetab << ": " << error.what() << en dl;

	1621 return 1;

	1622 }

	1623

	1624 // Merge ICU aliases into country map. Don't merge any alias

	1625 // that already has a country map, since that doesn't make sense.

	1626 // E.g. "Link Europe/Oslo Arctic/Longyearbyen" doesn't mean we

	1627 // should cross-map the countries between these two zones.

	1628 for (map<string,set<string> >::const_iterator i = links.begin();

	1629 i!=links.end(); ++i) {

	1630 const string& olson(i->first);

	1631 if (reverseCountryMap.find(olson) == reverseCountryMap.end()) {

	1632 continue;

	1633 }

	1634 string c = reverseCountryMap[olson];

	1635 const set<string>& aliases(i->second);

	1636 for (set<string>::const_iterator j=aliases.begin();

	1637 j != aliases.end(); ++j) {

	1638 if (reverseCountryMap.find(*j) == reverseCountryMap.end()) {

	1639 countryMap[c].insert(*j);

	1640 reverseCountryMap[*j] = c;

	1641 //cerr << "Aliased country: " << c << " <=> " << *j << endl;

	1642 }

	1643 }

	1644 }

	1645

	1646 // Create a pseudo-country containing all zones belonging to no country

	1647 set<string> nocountry;

	1648 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {

	1649 if (reverseCountryMap.find(i->first) == reverseCountryMap.end()) {

	1650 nocountry.insert(i->first);

	1651 }

	1652 }

	1653 countryMap[""] = nocountry;

	1654

	1655 // Get local time & year for below

	1656 time_t sec;

	1657 time(&sec);

	1658 struct tm* now = localtime(&sec);

	1659 int32_t thisYear = now->tm_year + 1900;

	1660

	1661 string filename = TZ_RESOURCE_NAME + ".txt";

	1662 // Write out a resource-bundle source file containing data for

	1663 // all zones.

	1664 ofstream file(filename.c_str());

	1665 if (file) {

	1666 file << "//---------------------------------------------------------" << endl

	1667 << "// Copyright (C) 2003";

	1668 if (thisYear > 2003) {

	1669 file << "-" << thisYear;

	1670 }

	1671 file << ", International Business Machines" << endl

	1672 << "// Corporation and others. All Rights Reserved." << endl

	1673 << "//---------------------------------------------------------" << endl

	1674 << "// Build tool: tz2icu" << endl

	1675 << "// Build date: " << asctime(now) /* << endl -- asctime emits CR */

	1676 << "// Olson source: ftp://elsie.nci.nih.gov/pub/" << endl

	1677 << "// Olson version: " << version << endl

	1678 << "// ICU version: " << U_ICU_VERSION << endl

	1679 << "//---------------------------------------------------------" << endl

	1680 << "// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<" << endl

	1681 << "// >> !!! >>> DO NOT EDIT <<< !!! <<" << endl

	1682 << "//---------------------------------------------------------" << endl

	1683 << endl

	1684 << TZ_RESOURCE_NAME << ":table(nofallback) {" << endl

	1685 << " TZVersion { \"" << version << "\" }" << endl

	1686 << " Zones:array { " << endl

	1687 << ZONEINFO // Zones (the actual data)

	1688 << " }" << endl;

	1689

	1690 // Names correspond to the Zones list, used for binary searching.

	1691 printStringList ( file, ZONEINFO ); // print the Names list

	1692

	1693 // Final Rules are used if requested by the zone

	1694 file << " Rules { " << endl;

	1695 // Emit final rules

	1696 int32_t frc = 0;

	1697 for(map<string,FinalRule>::iterator i=finalRules.begin();

	1698 i!=finalRules.end(); ++i) {

	1699 const string& id = i->first;

	1700 const FinalRule& r = i->second;

	1701 file << " " << id << ":intvector {" << endl;

	1702 r.print(file);

	1703 file << " } //_#" << frc++ << endl;

	1704 }

	1705 file << " }" << endl;

	1706

	1707 // Emit country (region) map.

	1708 if (ICU44PLUS) {

	1709 file << " Regions:array {" << endl;

	1710 int32_t zn = 0;

	1711 for (ZoneMap::iterator i=ZONEINFO.begin(); i!=ZONEINFO.end(); ++i) {

	1712 map<string, string>::iterator cit = reverseCountryMap.find(i->fi rst);

	1713 if (cit == reverseCountryMap.end()) {

	1714 file << " \"001\",";

	1715 } else {

	1716 file << " \"" << cit->second << "\", ";

	1717 }

	1718 file << "//Z#" << zn++ << " " << i->first << endl;

	1719 }

	1720 file << " }" << endl;

	1721 } else {

	1722 file << " Regions { " << endl;

	1723 int32_t rc = 0;

	1724 for (map<string, set<string> >::const_iterator i=countryMap.begin();

	1725 i != countryMap.end(); ++i) {

	1726 string country = i->first;

	1727 const set<string>& zones(i->second);

	1728 file << " ";

	1729 if(country[0]==0) {

	1730 file << "Default";

	1731 }

	1732 file << country << ":intvector { ";

	1733 bool first = true;

	1734 for (set<string>::const_iterator j=zones.begin();

	1735 j != zones.end(); ++j) {

	1736 if (!first) file << ", ";

	1737 first = false;

	1738 if (zoneIDs.find(*j) == zoneIDs.end()) {

	1739 cerr << "Error: Nonexistent zone in country map: " << *j << endl;

	1740 return 1;

	1741 }

	1742 file << zoneIDs[*j]; // emit the zone's index number

	1743 }

	1744 file << " } //R#" << rc++ << endl;

	1745 }

	1746 file << " }" << endl;

	1747 }

	1748

	1749 file << "}" << endl;

	1750 }

	1751

	1752 file.close();

	1753

	1754 if (file) { // recheck error bit

	1755 cout << "Finished writing " << TZ_RESOURCE_NAME << ".txt" << endl;

	1756 } else {

	1757 cerr << "Error: Unable to open/write to " << TZ_RESOURCE_NAME << ".txt" << endl;

	1758 return 1;

	1759 }

	1760 }

	1761 //eof

OLD	NEW

« no previous file with comments | « icu46/source/tools/tzcode/tz2icu.h ('k') | icu46/source/tools/tzcode/tzfile.h » ('j') | no next file with comments »