9c92ab6191415 (Thomas Gleixner 2019-05-29 07:17:56 -0700 1) // SPDX-License-Identifier: GPL-2.0-only
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 2) /*
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 3) * Kernel module for testing utf-8 support.
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 4) *
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 5) * Copyright 2017 Collabora Ltd.
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 6) */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 7)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 8) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 9)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 10) #include <linux/module.h>
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 11) #include <linux/printk.h>
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 12) #include <linux/unicode.h>
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 13) #include <linux/dcache.h>
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 14)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 15) #include "utf8n.h"
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 16)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 17) unsigned int failed_tests;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 18) unsigned int total_tests;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 19)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 20) /* Tests will be based on this version. */
1215d239e791c (Gabriel Krisman Bertazi 2019-04-25 13:59:17 -0400 21) #define latest_maj 12
1215d239e791c (Gabriel Krisman Bertazi 2019-04-25 13:59:17 -0400 22) #define latest_min 1
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 23) #define latest_rev 0
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 24)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 25) #define _test(cond, func, line, fmt, ...) do { \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 26) total_tests++; \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 27) if (!cond) { \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 28) failed_tests++; \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 29) pr_err("test %s:%d Failed: %s%s", \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 30) func, line, #cond, (fmt?":":".")); \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 31) if (fmt) \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 32) pr_err(fmt, ##__VA_ARGS__); \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 33) } \
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 34) } while (0)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 35) #define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 36) #define test(cond) _test(cond, __func__, __LINE__, "")
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 37)
334b427e96d10 (Krzysztof Wilczynski 2019-08-30 15:13:49 +0200 38) static const struct {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 39) /* UTF-8 strings in this vector _must_ be NULL-terminated. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 40) unsigned char str[10];
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 41) unsigned char dec[10];
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 42) } nfdi_test_data[] = {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 43) /* Trivial sequence */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 44) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 45) /* "ABba" decomposes to itself */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 46) .str = "aBba",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 47) .dec = "aBba",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 48) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 49) /* Simple equivalent sequences */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 50) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 51) /* 'VULGAR FRACTION ONE QUARTER' cannot decompose to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 52) 'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 53) canonical decomposition */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 54) .str = {0xc2, 0xbc, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 55) .dec = {0xc2, 0xbc, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 56) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 57) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 58) /* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 59) 'LETTER A' + 'COMBINING DIAERESIS' */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 60) .str = {0xc3, 0xa4, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 61) .dec = {0x61, 0xcc, 0x88, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 62) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 63) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 64) /* 'LATIN SMALL LETTER LJ' can't decompose to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 65) 'LETTER L' + 'LETTER J' on canonical decomposition */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 66) .str = {0xC7, 0x89, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 67) .dec = {0xC7, 0x89, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 68) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 69) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 70) /* GREEK ANO TELEIA decomposes to MIDDLE DOT */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 71) .str = {0xCE, 0x87, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 72) .dec = {0xC2, 0xB7, 0x00}
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 73) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 74) /* Canonical ordering */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 75) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 76) /* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 77) to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 78) .str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 79) .dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 80) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 81) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 82) /* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK'
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 83) decomposes to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 84) 'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 85) .str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 86)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 87) .dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 88) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 89)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 90) };
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 91)
334b427e96d10 (Krzysztof Wilczynski 2019-08-30 15:13:49 +0200 92) static const struct {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 93) /* UTF-8 strings in this vector _must_ be NULL-terminated. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 94) unsigned char str[30];
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 95) unsigned char ncf[30];
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 96) } nfdicf_test_data[] = {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 97) /* Trivial sequences */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 98) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 99) /* "ABba" folds to lowercase */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 100) .str = {0x41, 0x42, 0x62, 0x61, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 101) .ncf = {0x61, 0x62, 0x62, 0x61, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 102) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 103) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 104) /* All ASCII folds to lower-case */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 105) .str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 106) .ncf = "abcdefghijklmnopqrstuvwxyz0.1",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 107) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 108) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 109) /* LATIN SMALL LETTER SHARP S folds to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 110) LATIN SMALL LETTER S + LATIN SMALL LETTER S */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 111) .str = {0xc3, 0x9f, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 112) .ncf = {0x73, 0x73, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 113) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 114) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 115) /* LATIN CAPITAL LETTER A WITH RING ABOVE folds to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 116) LATIN SMALL LETTER A + COMBINING RING ABOVE */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 117) .str = {0xC3, 0x85, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 118) .ncf = {0x61, 0xcc, 0x8a, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 119) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 120) /* Introduced by UTF-8.0.0. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 121) /* Cherokee letters are interesting test-cases because they fold
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 122) to upper-case. Before 8.0.0, Cherokee lowercase were
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 123) undefined, thus, the folding from LC is not stable between
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 124) 7.0.0 -> 8.0.0, but it is from UC. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 125) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 126) /* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 127) .str = {0xea, 0xad, 0xb0, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 128) .ncf = {0xe1, 0x8e, 0xa0, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 129) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 130) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 131) /* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 132) .str = {0xe1, 0x8f, 0xb8, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 133) .ncf = {0xe1, 0x8f, 0xb0, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 134) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 135) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 136) /* OLD HUNGARIAN CAPITAL LETTER AMB folds to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 137) OLD HUNGARIAN SMALL LETTER AMB */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 138) .str = {0xf0, 0x90, 0xb2, 0x83, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 139) .ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 140) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 141) /* Introduced by UTF-9.0.0. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 142) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 143) /* OSAGE CAPITAL LETTER CHA folds to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 144) OSAGE SMALL LETTER CHA */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 145) .str = {0xf0, 0x90, 0x92, 0xb5, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 146) .ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 147) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 148) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 149) /* LATIN CAPITAL LETTER SMALL CAPITAL I folds to
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 150) LATIN LETTER SMALL CAPITAL I */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 151) .str = {0xea, 0x9e, 0xae, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 152) .ncf = {0xc9, 0xaa, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 153) },
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 154) /* Introduced by UTF-11.0.0. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 155) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 156) /* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 157) CAPITAL LETTER AN */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 158) .str = {0xe1, 0xb2, 0x90, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 159) .ncf = {0xe1, 0x83, 0x90, 0x00},
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 160) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 161) };
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 162)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 163) static void check_utf8_nfdi(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 164) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 165) int i;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 166) struct utf8cursor u8c;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 167) const struct utf8data *data;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 168)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 169) data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 170) if (!data) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 171) pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 172) __func__, latest_maj, latest_min, latest_rev);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 173) return;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 174) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 175)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 176) for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 177) int len = strlen(nfdi_test_data[i].str);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 178) int nlen = strlen(nfdi_test_data[i].dec);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 179) int j = 0;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 180) unsigned char c;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 181)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 182) test((utf8len(data, nfdi_test_data[i].str) == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 183) test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 184)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 185) if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 186) pr_err("can't create cursor\n");
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 187)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 188) while ((c = utf8byte(&u8c)) > 0) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 189) test_f((c == nfdi_test_data[i].dec[j]),
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 190) "Unexpected byte 0x%x should be 0x%x\n",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 191) c, nfdi_test_data[i].dec[j]);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 192) j++;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 193) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 194)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 195) test((j == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 196) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 197) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 198)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 199) static void check_utf8_nfdicf(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 200) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 201) int i;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 202) struct utf8cursor u8c;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 203) const struct utf8data *data;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 204)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 205) data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 206) if (!data) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 207) pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 208) __func__, latest_maj, latest_min, latest_rev);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 209) return;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 210) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 211)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 212) for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 213) int len = strlen(nfdicf_test_data[i].str);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 214) int nlen = strlen(nfdicf_test_data[i].ncf);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 215) int j = 0;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 216) unsigned char c;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 217)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 218) test((utf8len(data, nfdicf_test_data[i].str) == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 219) test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 220)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 221) if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 222) pr_err("can't create cursor\n");
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 223)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 224) while ((c = utf8byte(&u8c)) > 0) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 225) test_f((c == nfdicf_test_data[i].ncf[j]),
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 226) "Unexpected byte 0x%x should be 0x%x\n",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 227) c, nfdicf_test_data[i].ncf[j]);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 228) j++;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 229) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 230)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 231) test((j == nlen));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 232) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 233) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 234)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 235) static void check_utf8_comparisons(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 236) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 237) int i;
1215d239e791c (Gabriel Krisman Bertazi 2019-04-25 13:59:17 -0400 238) struct unicode_map *table = utf8_load("12.1.0");
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 239)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 240) if (IS_ERR(table)) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 241) pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 242) __func__, latest_maj, latest_min, latest_rev);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 243) return;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 244) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 245)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 246) for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 247) const struct qstr s1 = {.name = nfdi_test_data[i].str,
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 248) .len = sizeof(nfdi_test_data[i].str)};
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 249) const struct qstr s2 = {.name = nfdi_test_data[i].dec,
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 250) .len = sizeof(nfdi_test_data[i].dec)};
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 251)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 252) test_f(!utf8_strncmp(table, &s1, &s2),
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 253) "%s %s comparison mismatch\n", s1.name, s2.name);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 254) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 255)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 256) for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 257) const struct qstr s1 = {.name = nfdicf_test_data[i].str,
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 258) .len = sizeof(nfdicf_test_data[i].str)};
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 259) const struct qstr s2 = {.name = nfdicf_test_data[i].ncf,
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 260) .len = sizeof(nfdicf_test_data[i].ncf)};
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 261)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 262) test_f(!utf8_strncasecmp(table, &s1, &s2),
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 263) "%s %s comparison mismatch\n", s1.name, s2.name);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 264) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 265)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 266) utf8_unload(table);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 267) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 268)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 269) static void check_supported_versions(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 270) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 271) /* Unicode 7.0.0 should be supported. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 272) test(utf8version_is_supported(7, 0, 0));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 273)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 274) /* Unicode 9.0.0 should be supported. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 275) test(utf8version_is_supported(9, 0, 0));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 276)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 277) /* Unicode 1x.0.0 (the latest version) should be supported. */
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 278) test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 279)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 280) /* Next versions don't exist. */
1215d239e791c (Gabriel Krisman Bertazi 2019-04-25 13:59:17 -0400 281) test(!utf8version_is_supported(13, 0, 0));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 282) test(!utf8version_is_supported(0, 0, 0));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 283) test(!utf8version_is_supported(-1, -1, -1));
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 284) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 285)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 286) static int __init init_test_ucd(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 287) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 288) failed_tests = 0;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 289) total_tests = 0;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 290)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 291) check_supported_versions();
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 292) check_utf8_nfdi();
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 293) check_utf8_nfdicf();
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 294) check_utf8_comparisons();
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 295)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 296) if (!failed_tests)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 297) pr_info("All %u tests passed\n", total_tests);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 298) else
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 299) pr_err("%u out of %u tests failed\n", failed_tests,
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 300) total_tests);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 301) return 0;
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 302) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 303)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 304) static void __exit exit_test_ucd(void)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 305) {
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 306) }
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 307)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 308) module_init(init_test_ucd);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 309) module_exit(exit_test_ucd);
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 310)
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 311) MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>");
f0d6cc00325b3 (Gabriel Krisman Bertazi 2019-04-25 13:56:01 -0400 312) MODULE_LICENSE("GPL");