icu46/source/test/cintltst/spooftest.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/test/cintltst/spooftest.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/test/cintltst/spooftest.c

===================================================================

--- icu46/source/test/cintltst/spooftest.c (revision 0)

+++ icu46/source/test/cintltst/spooftest.c (revision 0)

@@ -0,0 +1,507 @@

+/********************************************************************

+ * COPYRIGHT:

+ ********************************************************************/

+/********************************************************************************

+* File spooftest.c

+*********************************************************************************/

+/*C API TEST for the uspoof Unicode Indentifier Spoofing and Security API */

+/**

+* This is an API test for ICU spoof detection in plain C. It doesn't test very many cases, and doesn't

+* try to test the full functionality. It just calls each function and verifies that it

+* works on a basic level.

+* More complete testing of spoof detection functionality is done with the C++ tests.

+**/

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION

+#include <stdlib.h>

+#include <stdio.h>

+#include <string.h>

+#include "unicode/uspoof.h"

+#include "unicode/ustring.h"

+#include "unicode/uset.h"

+#include "cintltst.h"

+#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \

+ log_err_status(status, "Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}

+#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \

+log_err("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}

+#define TEST_ASSERT_EQ(a, b) { if ((a) != (b)) { \

+ log_err("Test Failure at file %s, line %d: \"%s\" (%d) != \"%s\" (%d) \n", \

+ __FILE__, __LINE__, #a, (a), #b, (b)); }}

+#define TEST_ASSERT_NE(a, b) { if ((a) == (b)) { \

+ log_err("Test Failure at file %s, line %d: \"%s\" (%d) == \"%s\" (%d) \n", \

+ __FILE__, __LINE__, #a, (a), #b, (b)); }}

+/*

+ * TEST_SETUP and TEST_TEARDOWN

+ * macros to handle the boilerplate around setting up test case.

+ * Put arbitrary test code between SETUP and TEARDOWN.

+ * "sc" is the ready-to-go SpoofChecker for use in the tests.

+ */

+#define TEST_SETUP { \

+ UErrorCode status = U_ZERO_ERROR; \

+ USpoofChecker *sc; \

+ sc = uspoof_open(&status); \

+ TEST_ASSERT_SUCCESS(status); \

+ if (U_SUCCESS(status)){

+#define TEST_TEARDOWN \

+ } \

+ TEST_ASSERT_SUCCESS(status); \

+ uspoof_close(sc); \

+static void TestUSpoofCAPI(void);

+void addUSpoofTest(TestNode** root);

+void addUSpoofTest(TestNode** root)

+#if !UCONFIG_NO_FILE_IO

+ addTest(root, &TestUSpoofCAPI, "uspoof/TestUSpoofCAPI");

+#endif

+/*

+ * Identifiers for verifying that spoof checking is minimally alive and working.

+ */

+const UChar goodLatin[] = {(UChar)0x75, (UChar)0x7a, 0}; /* "uz", all ASCII */

+ /* (not confusable) */

+const UChar scMixed[] = {(UChar)0x73, (UChar)0x0441, 0}; /* "sc", with Cyrillic 'c' */

+ /* (mixed script, confusable */

+const UChar scLatin[] = {(UChar)0x73, (UChar)0x63, 0}; /* "sc", plain ascii. */

+const UChar goodCyrl[] = {(UChar)0x438, (UChar)0x43B, 0}; /* Plain lower case Cyrillic letters,

+ no latin confusables */

+const UChar goodGreek[] = {(UChar)0x3c0, (UChar)0x3c6, 0}; /* Plain lower case Greek letters */

+const UChar lll_Latin_a[] = {(UChar)0x6c, (UChar)0x49, (UChar)0x31, 0}; /* lI1, all ASCII */

+ /* Full-width I, Small Roman Numeral fifty, Latin Cap Letter IOTA*/

+const UChar lll_Latin_b[] = {(UChar)0xff29, (UChar)0x217c, (UChar)0x196, 0};

+const UChar lll_Cyrl[] = {(UChar)0x0406, (UChar)0x04C0, (UChar)0x31, 0};

+/* The skeleton transform for all of thes 'lll' lookalikes is all lower case l. */

+const UChar lll_Skel[] = {(UChar)0x6c, (UChar)0x6c, (UChar)0x6c, 0};

+/* Provide better code coverage */

+const char goodLatinUTF8[] = {0x75, 0x77, 0};

+/*

+ * Spoof Detction C API Tests

+ */

+static void TestUSpoofCAPI(void) {

+ /*

+ * basic uspoof_open().

+ */

+ {

+ USpoofChecker *sc;

+ UErrorCode status = U_ZERO_ERROR;

+ sc = uspoof_open(&status);

+ TEST_ASSERT_SUCCESS(status);

+ if (U_FAILURE(status)) {

+ /* If things are so broken that we can't even open a default spoof checker, */

+ /* don't even try the rest of the tests. They would all fail. */

+ return;

+ }

+ uspoof_close(sc);

+ }

+ /*

+ * Test Open from source rules.

+ */

+ TEST_SETUP

+ const char *dataSrcDir;

+ char *fileName;

+ char *confusables;

+ int confusablesLength;

+ char *confusablesWholeScript;

+ int confusablesWholeScriptLength;

+ FILE *f;

+ UParseError pe;

+ int32_t errType;

+ USpoofChecker *rsc;

+ dataSrcDir = ctest_dataSrcDir();

+ fileName = malloc(strlen(dataSrcDir) + 100);

+ strcpy(fileName, dataSrcDir);

+ strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");

+ f = fopen(fileName, "r");

+ TEST_ASSERT_NE(f, NULL);

+ confusables = malloc(3000000);

+ confusablesLength = fread(confusables, 1, 3000000, f);

+ fclose(f);

+ strcpy(fileName, dataSrcDir);

+ strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");

+ f = fopen(fileName, "r");

+ TEST_ASSERT_NE(f, NULL);

+ confusablesWholeScript = malloc(1000000);

+ confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);

+ fclose(f);

+ rsc = uspoof_openFromSource(confusables, confusablesLength,

+ confusablesWholeScript, confusablesWholeScriptLength,

+ &errType, &pe, &status);

+ TEST_ASSERT_SUCCESS(status);

+ free(confusablesWholeScript);

+ free(confusables);

+ free(fileName);

+ uspoof_close(rsc);

+ /* printf("ParseError Line is %d\n", pe.line); */

+ TEST_TEARDOWN;

+ /*

+ * openFromSerialized and serialize

+ */

+ TEST_SETUP

+ int32_t serializedSize = 0;

+ int32_t actualLength = 0;

+ char *buf;

+ USpoofChecker *sc2;

+ int32_t checkResults;

+ serializedSize = uspoof_serialize(sc, NULL, 0, &status);

+ TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);

+ TEST_ASSERT(serializedSize > 0);

+ /* Serialize the default spoof checker */

+ status = U_ZERO_ERROR;

+ buf = (char *)malloc(serializedSize + 10);

+ TEST_ASSERT(buf != NULL);

+ buf[serializedSize] = 42;

+ uspoof_serialize(sc, buf, serializedSize, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(42, buf[serializedSize]);

+ /* Create a new spoof checker from the freshly serialized data */

+ sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_NE(NULL, sc2);

+ TEST_ASSERT_EQ(serializedSize, actualLength);

+ /* Verify that the new spoof checker at least wiggles */

+ checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

+ uspoof_close(sc2);

+ free(buf);

+ TEST_TEARDOWN;

+ /*

+ * Set & Get Check Flags

+ */

+ TEST_SETUP

+ int32_t t;

+ uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);

+ TEST_ASSERT_SUCCESS(status);

+ t = uspoof_getChecks(sc, &status);

+ TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);

+ uspoof_setChecks(sc, 0, &status);

+ TEST_ASSERT_SUCCESS(status);

+ t = uspoof_getChecks(sc, &status);

+ TEST_ASSERT_EQ(0, t);

+ uspoof_setChecks(sc,

+ USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,

+ &status);

+ TEST_ASSERT_SUCCESS(status);

+ t = uspoof_getChecks(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);

+ TEST_TEARDOWN;

+ /*

+ * get & setAllowedChars

+ */

+ TEST_SETUP

+ USet *us;

+ const USet *uset;

+ uset = uspoof_getAllowedChars(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT(uset_isFrozen(uset));

+ us = uset_open((UChar32)0x41, (UChar32)0x5A); /* [A-Z] */

+ uspoof_setAllowedChars(sc, us, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));

+ TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));

+ TEST_ASSERT_SUCCESS(status);

+ uset_close(us);

+ TEST_TEARDOWN;

+ /*

+ * clone()

+ */

+ TEST_SETUP

+ USpoofChecker *clone1 = NULL;

+ USpoofChecker *clone2 = NULL;

+ int32_t checkResults = 0;

+ clone1 = uspoof_clone(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_NE(clone1, sc);

+ clone2 = uspoof_clone(clone1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_NE(clone2, clone1);

+ uspoof_close(clone1);

+ /* Verify that the cloned spoof checker is alive */

+ checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

+ uspoof_close(clone2);

+ TEST_TEARDOWN;

+ /*

+ * get & set Checks

+ */

+ TEST_SETUP

+ int32_t checks;

+ int32_t checks2;

+ int32_t checkResults;

+ checks = uspoof_getChecks(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);

+ checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);

+ uspoof_setChecks(sc, checks, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checks2 = uspoof_getChecks(sc, &status);

+ TEST_ASSERT_EQ(checks, checks2);

+ /* The checks that were disabled just above are the same ones that the "scMixed" test fails.

+ So with those tests gone checking that Identifier should now succeed */

+ checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ TEST_TEARDOWN;

+ /*

+ * AllowedLoacles

+ */

+ TEST_SETUP

+ const char *allowedLocales;

+ int32_t checkResults;

+ /* Default allowed locales list should be empty */

+ allowedLocales = uspoof_getAllowedLocales(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT(strcmp("", allowedLocales) == 0)

+ /* Allow en and ru, which should enable Latin and Cyrillic only to pass */

+ uspoof_setAllowedLocales(sc, "en, ru_RU", &status);

+ TEST_ASSERT_SUCCESS(status);

+ allowedLocales = uspoof_getAllowedLocales(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT(strstr(allowedLocales, "en") != NULL);

+ TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);

+ /* Limit checks to USPOOF_CHAR_LIMIT. Some of the test data has whole script confusables also,

+ * which we don't want to see in this test. */

+ uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);

+ checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ /* Reset with an empty locale list, which should allow all characters to pass */

+ uspoof_setAllowedLocales(sc, " ", &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ TEST_TEARDOWN;

+ /*

+ * AllowedChars set/get the USet of allowed characters.

+ */

+ TEST_SETUP

+ const USet *set;

+ USet *tmpSet;

+ int32_t checkResults;

+ /* By default, we should see no restriction; the USet should allow all characters. */

+ set = uspoof_getAllowedChars(sc, &status);

+ TEST_ASSERT_SUCCESS(status);

+ tmpSet = uset_open(0, 0x10ffff);

+ TEST_ASSERT(uset_equals(tmpSet, set));

+ /* Setting the allowed chars should enable the check. */

+ uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);

+ TEST_ASSERT_SUCCESS(status);

+ /* Remove a character that is in our good Latin test identifier from the allowed chars set. */

+ uset_remove(tmpSet, goodLatin[1]);

+ uspoof_setAllowedChars(sc, tmpSet, &status);

+ TEST_ASSERT_SUCCESS(status);

+ uset_close(tmpSet);

+ /* Latin Identifier should now fail; other non-latin test cases should still be OK */

+ checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);

+ checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);

+ TEST_TEARDOWN;

+ /*

+ * check UTF-8

+ */

+ TEST_SETUP

+ char utf8buf[200];

+ int32_t checkResults;

+ int32_t position;

+ u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ position = 666;

+ checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ TEST_ASSERT_EQ(666, position);

+ u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ position = 666;

+ checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);

+ TEST_ASSERT_EQ(2, position);

+ TEST_TEARDOWN;

+ /*

+ * uspoof_areConfusable()

+ */

+ TEST_SETUP

+ int32_t checkResults;

+ checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

+ checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

+ TEST_TEARDOWN;

+ /*

+ * areConfusableUTF8

+ */

+ TEST_SETUP

+ int32_t checkResults;

+ char s1[200];

+ char s2[200];

+ u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);

+ u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

+ u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);

+ u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, checkResults);

+ u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);

+ u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

+ TEST_TEARDOWN;

+ /*

+ * getSkeleton

+ */

+ TEST_SETUP

+ UChar dest[100];

+ int32_t skelLength;

+ skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status);

+ TEST_ASSERT_SUCCESS(status);

+ TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));

+ TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);

+ skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest,

+ sizeof(dest)/sizeof(UChar), &status);

+ TEST_ASSERT_SUCCESS(status);

+ skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);

+ TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);

+ TEST_ASSERT_EQ(3, skelLength);

+ status = U_ZERO_ERROR;

+ TEST_TEARDOWN;

+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */

Property changes on: icu46/source/test/cintltst/spooftest.c

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/test/cintltst/sorttest.c ('k') | icu46/source/test/cintltst/spreptst.c » ('j') | no next file with comments »