VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   32 Branches   54 Tags
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    1) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    2)  * Copyright (c) 2014 SGI.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    3)  * All rights reserved.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    4)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    5)  * This program is free software; you can redistribute it and/or
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    6)  * modify it under the terms of the GNU General Public License as
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    7)  * published by the Free Software Foundation.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    8)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400    9)  * This program is distributed in the hope that it would be useful,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   10)  * but WITHOUT ANY WARRANTY; without even the implied warranty of
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   11)  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   12)  * GNU General Public License for more details.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   13)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   14)  * You should have received a copy of the GNU General Public License
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   15)  * along with this program; if not, write the Free Software Foundation,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   16)  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   17)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   18) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   19) /* Generator for a compact trie for unicode normalization */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   20) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   21) #include <sys/types.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   22) #include <stddef.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   23) #include <stdlib.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   24) #include <stdio.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   25) #include <assert.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   26) #include <string.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   27) #include <unistd.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   28) #include <errno.h>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   29) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   30) /* Default names of the in- and output files. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   31) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   32) #define AGE_NAME	"DerivedAge.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   33) #define CCC_NAME	"DerivedCombiningClass.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   34) #define PROP_NAME	"DerivedCoreProperties.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   35) #define DATA_NAME	"UnicodeData.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   36) #define FOLD_NAME	"CaseFolding.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   37) #define NORM_NAME	"NormalizationCorrections.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   38) #define TEST_NAME	"NormalizationTest.txt"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   39) #define UTF8_NAME	"utf8data.h"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   40) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   41) const char	*age_name  = AGE_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   42) const char	*ccc_name  = CCC_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   43) const char	*prop_name = PROP_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   44) const char	*data_name = DATA_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   45) const char	*fold_name = FOLD_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   46) const char	*norm_name = NORM_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   47) const char	*test_name = TEST_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   48) const char	*utf8_name = UTF8_NAME;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   49) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   50) int verbose = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   51) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   52) /* An arbitrary line size limit on input lines. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   53) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   54) #define LINESIZE	1024
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   55) char line[LINESIZE];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   56) char buf0[LINESIZE];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   57) char buf1[LINESIZE];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   58) char buf2[LINESIZE];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   59) char buf3[LINESIZE];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   60) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   61) const char *argv0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   62) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   63) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   64) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   65) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   66) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   67) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   68)  * Unicode version numbers consist of three parts: major, minor, and a
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   69)  * revision.  These numbers are packed into an unsigned int to obtain
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   70)  * a single version number.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   71)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   72)  * To save space in the generated trie, the unicode version is not
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   73)  * stored directly, instead we calculate a generation number from the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   74)  * unicode versions seen in the DerivedAge file, and use that as an
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   75)  * index into a table of unicode versions.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   76)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   77) #define UNICODE_MAJ_SHIFT		(16)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   78) #define UNICODE_MIN_SHIFT		(8)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   79) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   80) #define UNICODE_MAJ_MAX			((unsigned short)-1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   81) #define UNICODE_MIN_MAX			((unsigned char)-1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   82) #define UNICODE_REV_MAX			((unsigned char)-1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   83) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   84) #define UNICODE_AGE(MAJ,MIN,REV)			\
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   85) 	(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) |	\
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   86) 	 ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) |	\
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   87) 	 ((unsigned int)(REV)))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   88) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   89) unsigned int *ages;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   90) int ages_count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   91) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   92) unsigned int unicode_maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   93) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   94) static int age_valid(unsigned int major, unsigned int minor,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   95) 		     unsigned int revision)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   96) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   97) 	if (major > UNICODE_MAJ_MAX)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   98) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400   99) 	if (minor > UNICODE_MIN_MAX)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  100) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  101) 	if (revision > UNICODE_REV_MAX)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  102) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  103) 	return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  104) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  105) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  106) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  107) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  108) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  109)  * utf8trie_t
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  110)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  111)  * A compact binary tree, used to decode UTF-8 characters.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  112)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  113)  * Internal nodes are one byte for the node itself, and up to three
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  114)  * bytes for an offset into the tree.  The first byte contains the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  115)  * following information:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  116)  *  NEXTBYTE  - flag        - advance to next byte if set
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  117)  *  BITNUM    - 3 bit field - the bit number to tested
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  118)  *  OFFLEN    - 2 bit field - number of bytes in the offset
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  119)  * if offlen == 0 (non-branching node)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  120)  *  RIGHTPATH - 1 bit field - set if the following node is for the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  121)  *                            right-hand path (tested bit is set)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  122)  *  TRIENODE  - 1 bit field - set if the following node is an internal
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  123)  *                            node, otherwise it is a leaf node
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  124)  * if offlen != 0 (branching node)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  125)  *  LEFTNODE  - 1 bit field - set if the left-hand node is internal
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  126)  *  RIGHTNODE - 1 bit field - set if the right-hand node is internal
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  127)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  128)  * Due to the way utf8 works, there cannot be branching nodes with
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  129)  * NEXTBYTE set, and moreover those nodes always have a righthand
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  130)  * descendant.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  131)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  132) typedef unsigned char utf8trie_t;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  133) #define BITNUM		0x07
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  134) #define NEXTBYTE	0x08
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  135) #define OFFLEN		0x30
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  136) #define OFFLEN_SHIFT	4
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  137) #define RIGHTPATH	0x40
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  138) #define TRIENODE	0x80
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  139) #define RIGHTNODE	0x40
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  140) #define LEFTNODE	0x80
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  141) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  142) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  143)  * utf8leaf_t
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  144)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  145)  * The leaves of the trie are embedded in the trie, and so the same
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  146)  * underlying datatype, unsigned char.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  147)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  148)  * leaf[0]: The unicode version, stored as a generation number that is
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  149)  *          an index into utf8agetab[].  With this we can filter code
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  150)  *          points based on the unicode version in which they were
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  151)  *          defined.  The CCC of a non-defined code point is 0.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  152)  * leaf[1]: Canonical Combining Class. During normalization, we need
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  153)  *          to do a stable sort into ascending order of all characters
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  154)  *          with a non-zero CCC that occur between two characters with
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  155)  *          a CCC of 0, or at the begin or end of a string.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  156)  *          The unicode standard guarantees that all CCC values are
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  157)  *          between 0 and 254 inclusive, which leaves 255 available as
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  158)  *          a special value.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  159)  *          Code points with CCC 0 are known as stoppers.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  160)  * leaf[2]: Decomposition. If leaf[1] == 255, then leaf[2] is the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  161)  *          start of a NUL-terminated string that is the decomposition
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  162)  *          of the character.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  163)  *          The CCC of a decomposable character is the same as the CCC
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  164)  *          of the first character of its decomposition.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  165)  *          Some characters decompose as the empty string: these are
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  166)  *          characters with the Default_Ignorable_Code_Point property.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  167)  *          These do affect normalization, as they all have CCC 0.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  168)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  169)  * The decompositions in the trie have been fully expanded.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  170)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  171)  * Casefolding, if applicable, is also done using decompositions.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  172)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  173) typedef unsigned char utf8leaf_t;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  174) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  175) #define LEAF_GEN(LEAF)	((LEAF)[0])
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  176) #define LEAF_CCC(LEAF)	((LEAF)[1])
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  177) #define LEAF_STR(LEAF)	((const char*)((LEAF) + 2))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  178) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  179) #define MAXGEN		(255)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  180) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  181) #define MINCCC		(0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  182) #define MAXCCC		(254)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  183) #define STOPPER		(0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  184) #define DECOMPOSE	(255)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  185) #define HANGUL		((char)(255))
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  186) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  187) #define UTF8HANGULLEAF	(12)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  188) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  189) struct tree;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  190) static utf8leaf_t *utf8nlookup(struct tree *, unsigned char *,
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  191) 			       const char *, size_t);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  192) static utf8leaf_t *utf8lookup(struct tree *, unsigned char *, const char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  193) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  194) unsigned char *utf8data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  195) size_t utf8data_size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  196) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  197) utf8trie_t *nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  198) utf8trie_t *nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  199) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  200) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  201) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  202) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  203)  * UTF8 valid ranges.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  204)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  205)  * The UTF-8 encoding spreads the bits of a 32bit word over several
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  206)  * bytes. This table gives the ranges that can be held and how they'd
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  207)  * be represented.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  208)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  209)  * 0x00000000 0x0000007F: 0xxxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  210)  * 0x00000000 0x000007FF: 110xxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  211)  * 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  212)  * 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  213)  * 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  214)  * 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  215)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  216)  * There is an additional requirement on UTF-8, in that only the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  217)  * shortest representation of a 32bit value is to be used.  A decoder
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  218)  * must not decode sequences that do not satisfy this requirement.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  219)  * Thus the allowed ranges have a lower bound.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  220)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  221)  * 0x00000000 0x0000007F: 0xxxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  222)  * 0x00000080 0x000007FF: 110xxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  223)  * 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  224)  * 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  225)  * 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  226)  * 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  227)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  228)  * Actual unicode characters are limited to the range 0x0 - 0x10FFFF,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  229)  * 17 planes of 65536 values.  This limits the sequences actually seen
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  230)  * even more, to just the following.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  231)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  232)  *          0 -     0x7f: 0                     0x7f
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  233)  *       0x80 -    0x7ff: 0xc2 0x80             0xdf 0xbf
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  234)  *      0x800 -   0xffff: 0xe0 0xa0 0x80        0xef 0xbf 0xbf
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  235)  *    0x10000 - 0x10ffff: 0xf0 0x90 0x80 0x80   0xf4 0x8f 0xbf 0xbf
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  236)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  237)  * Even within those ranges not all values are allowed: the surrogates
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  238)  * 0xd800 - 0xdfff should never be seen.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  239)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  240)  * Note that the longest sequence seen with valid usage is 4 bytes,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  241)  * the same a single UTF-32 character.  This makes the UTF-8
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  242)  * representation of Unicode strictly smaller than UTF-32.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  243)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  244)  * The shortest sequence requirement was introduced by:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  245)  *    Corrigendum #1: UTF-8 Shortest Form
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  246)  * It can be found here:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  247)  *    http://www.unicode.org/versions/corrigendum1.html
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  248)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  249)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  250) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  251) #define UTF8_2_BITS     0xC0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  252) #define UTF8_3_BITS     0xE0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  253) #define UTF8_4_BITS     0xF0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  254) #define UTF8_N_BITS     0x80
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  255) #define UTF8_2_MASK     0xE0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  256) #define UTF8_3_MASK     0xF0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  257) #define UTF8_4_MASK     0xF8
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  258) #define UTF8_N_MASK     0xC0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  259) #define UTF8_V_MASK     0x3F
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  260) #define UTF8_V_SHIFT    6
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  261) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  262) static int utf8encode(char *str, unsigned int val)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  263) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  264) 	int len;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  265) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  266) 	if (val < 0x80) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  267) 		str[0] = val;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  268) 		len = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  269) 	} else if (val < 0x800) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  270) 		str[1] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  271) 		str[1] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  272) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  273) 		str[0] = val;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  274) 		str[0] |= UTF8_2_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  275) 		len = 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  276) 	} else if (val < 0x10000) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  277) 		str[2] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  278) 		str[2] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  279) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  280) 		str[1] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  281) 		str[1] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  282) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  283) 		str[0] = val;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  284) 		str[0] |= UTF8_3_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  285) 		len = 3;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  286) 	} else if (val < 0x110000) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  287) 		str[3] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  288) 		str[3] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  289) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  290) 		str[2] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  291) 		str[2] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  292) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  293) 		str[1] = val & UTF8_V_MASK;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  294) 		str[1] |= UTF8_N_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  295) 		val >>= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  296) 		str[0] = val;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  297) 		str[0] |= UTF8_4_BITS;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  298) 		len = 4;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  299) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  300) 		printf("%#x: illegal val\n", val);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  301) 		len = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  302) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  303) 	return len;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  304) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  305) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  306) static unsigned int utf8decode(const char *str)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  307) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  308) 	const unsigned char *s = (const unsigned char*)str;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  309) 	unsigned int unichar = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  310) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  311) 	if (*s < 0x80) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  312) 		unichar = *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  313) 	} else if (*s < UTF8_3_BITS) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  314) 		unichar = *s++ & 0x1F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  315) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  316) 		unichar |= *s & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  317) 	} else if (*s < UTF8_4_BITS) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  318) 		unichar = *s++ & 0x0F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  319) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  320) 		unichar |= *s++ & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  321) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  322) 		unichar |= *s & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  323) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  324) 		unichar = *s++ & 0x0F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  325) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  326) 		unichar |= *s++ & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  327) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  328) 		unichar |= *s++ & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  329) 		unichar <<= UTF8_V_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  330) 		unichar |= *s & 0x3F;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  331) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  332) 	return unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  333) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  334) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  335) static int utf32valid(unsigned int unichar)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  336) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  337) 	return unichar < 0x110000;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  338) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  339) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  340) #define HANGUL_SYLLABLE(U)	((U) >= 0xAC00 && (U) <= 0xD7A3)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  341) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  342) #define NODE 1
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  343) #define LEAF 0
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  344) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  345) struct tree {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  346) 	void *root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  347) 	int childnode;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  348) 	const char *type;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  349) 	unsigned int maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  350) 	struct tree *next;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  351) 	int (*leaf_equal)(void *, void *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  352) 	void (*leaf_print)(void *, int);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  353) 	int (*leaf_mark)(void *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  354) 	int (*leaf_size)(void *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  355) 	int *(*leaf_index)(struct tree *, void *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  356) 	unsigned char *(*leaf_emit)(void *, unsigned char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  357) 	int leafindex[0x110000];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  358) 	int index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  359) };
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  360) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  361) struct node {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  362) 	int index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  363) 	int offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  364) 	int mark;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  365) 	int size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  366) 	struct node *parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  367) 	void *left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  368) 	void *right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  369) 	unsigned char bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  370) 	unsigned char nextbyte;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  371) 	unsigned char leftnode;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  372) 	unsigned char rightnode;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  373) 	unsigned int keybits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  374) 	unsigned int keymask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  375) };
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  376) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  377) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  378)  * Example lookup function for a tree.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  379)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  380) static void *lookup(struct tree *tree, const char *key)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  381) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  382) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  383) 	void *leaf = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  384) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  385) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  386) 	while (!leaf && node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  387) 		if (node->nextbyte)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  388) 			key++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  389) 		if (*key & (1 << (node->bitnum & 7))) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  390) 			/* Right leg */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  391) 			if (node->rightnode == NODE) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  392) 				node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  393) 			} else if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  394) 				leaf = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  395) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  396) 				node = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  397) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  398) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  399) 			/* Left leg */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  400) 			if (node->leftnode == NODE) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  401) 				node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  402) 			} else if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  403) 				leaf = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  404) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  405) 				node = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  406) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  407) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  408) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  409) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  410) 	return leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  411) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  412) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  413) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  414)  * A simple non-recursive tree walker: keep track of visits to the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  415)  * left and right branches in the leftmask and rightmask.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  416)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  417) static void tree_walk(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  418) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  419) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  420) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  421) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  422) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  423) 	int indent = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  424) 	int nodes, singletons, leaves;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  425) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  426) 	nodes = singletons = leaves = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  427) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  428) 	printf("%s_%x root %p\n", tree->type, tree->maxage, tree->root);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  429) 	if (tree->childnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  430) 		assert(tree->root);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  431) 		tree->leaf_print(tree->root, indent);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  432) 		leaves = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  433) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  434) 		assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  435) 		node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  436) 		leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  437) 		while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  438) 			printf("%*snode @ %p bitnum %d nextbyte %d"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  439) 			       " left %p right %p mask %x bits %x\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  440) 				indent, "", node,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  441) 				node->bitnum, node->nextbyte,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  442) 				node->left, node->right,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  443) 				node->keymask, node->keybits);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  444) 			nodes += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  445) 			if (!(node->left && node->right))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  446) 				singletons += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  447) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  448) 			while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  449) 				bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  450) 				if ((leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  451) 					leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  452) 					if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  453) 						assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  454) 						tree->leaf_print(node->left,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  455) 								 indent+1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  456) 						leaves += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  457) 					} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  458) 						assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  459) 						indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  460) 						node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  461) 						break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  462) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  463) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  464) 				if ((rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  465) 					rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  466) 					if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  467) 						assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  468) 						tree->leaf_print(node->right,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  469) 								 indent+1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  470) 						leaves += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  471) 					} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  472) 						assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  473) 						indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  474) 						node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  475) 						break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  476) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  477) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  478) 				leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  479) 				rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  480) 				node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  481) 				indent -= 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  482) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  483) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  484) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  485) 	printf("nodes %d leaves %d singletons %d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  486) 	       nodes, leaves, singletons);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  487) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  488) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  489) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  490)  * Allocate an initialize a new internal node.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  491)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  492) static struct node *alloc_node(struct node *parent)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  493) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  494) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  495) 	int bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  496) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  497) 	node = malloc(sizeof(*node));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  498) 	node->left = node->right = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  499) 	node->parent = parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  500) 	node->leftnode = NODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  501) 	node->rightnode = NODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  502) 	node->keybits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  503) 	node->keymask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  504) 	node->mark = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  505) 	node->index = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  506) 	node->offset = -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  507) 	node->size = 4;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  508) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  509) 	if (node->parent) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  510) 		bitnum = parent->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  511) 		if ((bitnum & 7) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  512) 			node->bitnum = bitnum + 7 + 8;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  513) 			node->nextbyte = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  514) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  515) 			node->bitnum = bitnum - 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  516) 			node->nextbyte = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  517) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  518) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  519) 		node->bitnum = 7;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  520) 		node->nextbyte = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  521) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  522) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  523) 	return node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  524) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  525) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  526) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  527)  * Insert a new leaf into the tree, and collapse any subtrees that are
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  528)  * fully populated and end in identical leaves. A nextbyte tagged
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  529)  * internal node will not be removed to preserve the tree's integrity.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  530)  * Note that due to the structure of utf8, no nextbyte tagged node
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  531)  * will be a candidate for removal.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  532)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  533) static int insert(struct tree *tree, char *key, int keylen, void *leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  534) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  535) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  536) 	struct node *parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  537) 	void **cursor;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  538) 	int keybits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  539) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  540) 	assert(keylen >= 1 && keylen <= 4);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  541) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  542) 	node = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  543) 	cursor = &tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  544) 	keybits = 8 * keylen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  545) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  546) 	/* Insert, creating path along the way. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  547) 	while (keybits) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  548) 		if (!*cursor)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  549) 			*cursor = alloc_node(node);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  550) 		node = *cursor;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  551) 		if (node->nextbyte)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  552) 			key++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  553) 		if (*key & (1 << (node->bitnum & 7)))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  554) 			cursor = &node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  555) 		else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  556) 			cursor = &node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  557) 		keybits--;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  558) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  559) 	*cursor = leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  560) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  561) 	/* Merge subtrees if possible. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  562) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  563) 		if (*key & (1 << (node->bitnum & 7)))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  564) 			node->rightnode = LEAF;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  565) 		else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  566) 			node->leftnode = LEAF;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  567) 		if (node->nextbyte)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  568) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  569) 		if (node->leftnode == NODE || node->rightnode == NODE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  570) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  571) 		assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  572) 		assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  573) 		/* Compare */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  574) 		if (! tree->leaf_equal(node->left, node->right))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  575) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  576) 		/* Keep left, drop right leaf. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  577) 		leaf = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  578) 		/* Check in parent */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  579) 		parent = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  580) 		if (!parent) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  581) 			/* root of tree! */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  582) 			tree->root = leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  583) 			tree->childnode = LEAF;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  584) 		} else if (parent->left == node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  585) 			parent->left = leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  586) 			parent->leftnode = LEAF;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  587) 			if (parent->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  588) 				parent->keymask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  589) 				parent->keybits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  590) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  591) 				parent->keymask |= (1 << node->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  592) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  593) 		} else if (parent->right == node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  594) 			parent->right = leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  595) 			parent->rightnode = LEAF;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  596) 			if (parent->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  597) 				parent->keymask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  598) 				parent->keybits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  599) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  600) 				parent->keymask |= (1 << node->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  601) 				parent->keybits |= (1 << node->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  602) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  603) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  604) 			/* internal tree error */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  605) 			assert(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  606) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  607) 		free(node);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  608) 		node = parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  609) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  610) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  611) 	/* Propagate keymasks up along singleton chains. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  612) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  613) 		parent = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  614) 		if (!parent)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  615) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  616) 		/* Nix the mask for parents with two children. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  617) 		if (node->keymask == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  618) 			parent->keymask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  619) 			parent->keybits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  620) 		} else if (parent->left && parent->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  621) 			parent->keymask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  622) 			parent->keybits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  623) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  624) 			assert((parent->keymask & node->keymask) == 0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  625) 			parent->keymask |= node->keymask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  626) 			parent->keymask |= (1 << parent->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  627) 			parent->keybits |= node->keybits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  628) 			if (parent->right)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  629) 				parent->keybits |= (1 << parent->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  630) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  631) 		node = parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  632) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  633) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  634) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  635) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  636) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  637) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  638)  * Prune internal nodes.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  639)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  640)  * Fully populated subtrees that end at the same leaf have already
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  641)  * been collapsed.  There are still internal nodes that have for both
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  642)  * their left and right branches a sequence of singletons that make
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  643)  * identical choices and end in identical leaves.  The keymask and
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  644)  * keybits collected in the nodes describe the choices made in these
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  645)  * singleton chains.  When they are identical for the left and right
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  646)  * branch of a node, and the two leaves comare identical, the node in
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  647)  * question can be removed.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  648)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  649)  * Note that nodes with the nextbyte tag set will not be removed by
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  650)  * this to ensure tree integrity.  Note as well that the structure of
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  651)  * utf8 ensures that these nodes would not have been candidates for
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  652)  * removal in any case.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  653)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  654) static void prune(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  655) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  656) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  657) 	struct node *left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  658) 	struct node *right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  659) 	struct node *parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  660) 	void *leftleaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  661) 	void *rightleaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  662) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  663) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  664) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  665) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  666) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  667) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  668) 		printf("Pruning %s_%x\n", tree->type, tree->maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  669) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  670) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  671) 	if (tree->childnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  672) 		return;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  673) 	if (!tree->root)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  674) 		return;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  675) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  676) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  677) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  678) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  679) 		if (node->nextbyte)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  680) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  681) 		if (node->leftnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  682) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  683) 		if (node->rightnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  684) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  685) 		if (!node->left)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  686) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  687) 		if (!node->right)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  688) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  689) 		left = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  690) 		right = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  691) 		if (left->keymask == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  692) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  693) 		if (right->keymask == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  694) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  695) 		if (left->keymask != right->keymask)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  696) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  697) 		if (left->keybits != right->keybits)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  698) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  699) 		leftleaf = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  700) 		while (!leftleaf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  701) 			assert(left->left || left->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  702) 			if (left->leftnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  703) 				leftleaf = left->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  704) 			else if (left->rightnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  705) 				leftleaf = left->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  706) 			else if (left->left)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  707) 				left = left->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  708) 			else if (left->right)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  709) 				left = left->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  710) 			else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  711) 				assert(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  712) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  713) 		rightleaf = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  714) 		while (!rightleaf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  715) 			assert(right->left || right->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  716) 			if (right->leftnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  717) 				rightleaf = right->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  718) 			else if (right->rightnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  719) 				rightleaf = right->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  720) 			else if (right->left)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  721) 				right = right->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  722) 			else if (right->right)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  723) 				right = right->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  724) 			else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  725) 				assert(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  726) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  727) 		if (! tree->leaf_equal(leftleaf, rightleaf))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  728) 			goto advance;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  729) 		/*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  730) 		 * This node has identical singleton-only subtrees.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  731) 		 * Remove it.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  732) 		 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  733) 		parent = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  734) 		left = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  735) 		right = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  736) 		if (parent->left == node)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  737) 			parent->left = left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  738) 		else if (parent->right == node)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  739) 			parent->right = left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  740) 		else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  741) 			assert(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  742) 		left->parent = parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  743) 		left->keymask |= (1 << node->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  744) 		node->left = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  745) 		while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  746) 			bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  747) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  748) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  749) 			if (node->leftnode == NODE && node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  750) 				left = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  751) 				free(node);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  752) 				count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  753) 				node = left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  754) 			} else if (node->rightnode == NODE && node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  755) 				right = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  756) 				free(node);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  757) 				count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  758) 				node = right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  759) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  760) 				node = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  761) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  762) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  763) 		/* Propagate keymasks up along singleton chains. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  764) 		node = parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  765) 		/* Force re-check */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  766) 		bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  767) 		leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  768) 		rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  769) 		for (;;) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  770) 			if (node->left && node->right)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  771) 				break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  772) 			if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  773) 				left = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  774) 				node->keymask |= left->keymask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  775) 				node->keybits |= left->keybits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  776) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  777) 			if (node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  778) 				right = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  779) 				node->keymask |= right->keymask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  780) 				node->keybits |= right->keybits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  781) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  782) 			node->keymask |= (1 << node->bitnum);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  783) 			node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  784) 			/* Force re-check */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  785) 			bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  786) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  787) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  788) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  789) 	advance:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  790) 		bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  791) 		if ((leftmask & bitmask) == 0 &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  792) 		    node->leftnode == NODE &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  793) 		    node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  794) 			leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  795) 			node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  796) 		} else if ((rightmask & bitmask) == 0 &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  797) 			   node->rightnode == NODE &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  798) 			   node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  799) 			rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  800) 			node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  801) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  802) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  803) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  804) 			node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  805) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  806) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  807) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  808) 		printf("Pruned %d nodes\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  809) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  810) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  811) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  812)  * Mark the nodes in the tree that lead to leaves that must be
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  813)  * emitted.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  814)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  815) static void mark_nodes(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  816) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  817) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  818) 	struct node *n;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  819) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  820) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  821) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  822) 	int marked;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  823) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  824) 	marked = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  825) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  826) 		printf("Marking %s_%x\n", tree->type, tree->maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  827) 	if (tree->childnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  828) 		goto done;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  829) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  830) 	assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  831) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  832) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  833) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  834) 		bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  835) 		if ((leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  836) 			leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  837) 			if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  838) 				assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  839) 				if (tree->leaf_mark(node->left)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  840) 					n = node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  841) 					while (n && !n->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  842) 						marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  843) 						n->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  844) 						n = n->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  845) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  846) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  847) 			} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  848) 				assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  849) 				node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  850) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  851) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  852) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  853) 		if ((rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  854) 			rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  855) 			if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  856) 				assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  857) 				if (tree->leaf_mark(node->right)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  858) 					n = node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  859) 					while (n && !n->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  860) 						marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  861) 						n->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  862) 						n = n->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  863) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  864) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  865) 			} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  866) 				assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  867) 				node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  868) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  869) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  870) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  871) 		leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  872) 		rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  873) 		node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  874) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  875) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  876) 	/* second pass: left siblings and singletons */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  877) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  878) 	assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  879) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  880) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  881) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  882) 		bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  883) 		if ((leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  884) 			leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  885) 			if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  886) 				assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  887) 				if (tree->leaf_mark(node->left)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  888) 					n = node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  889) 					while (n && !n->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  890) 						marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  891) 						n->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  892) 						n = n->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  893) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  894) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  895) 			} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  896) 				assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  897) 				node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  898) 				if (!node->mark && node->parent->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  899) 					marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  900) 					node->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  901) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  902) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  903) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  904) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  905) 		if ((rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  906) 			rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  907) 			if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  908) 				assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  909) 				if (tree->leaf_mark(node->right)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  910) 					n = node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  911) 					while (n && !n->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  912) 						marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  913) 						n->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  914) 						n = n->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  915) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  916) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  917) 			} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400  918) 				assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  919) 				node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  920) 				if (!node->mark && node->parent->mark &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  921) 				    !node->parent->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  922) 					marked++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  923) 					node->mark = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  924) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  925) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  926) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  927) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  928) 		leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  929) 		rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  930) 		node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  931) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  932) done:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  933) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  934) 		printf("Marked %d nodes\n", marked);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  935) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  936) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  937) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  938)  * Compute the index of each node and leaf, which is the offset in the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  939)  * emitted trie.  These values must be pre-computed because relative
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  940)  * offsets between nodes are used to navigate the tree.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  941)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  942) static int index_nodes(struct tree *tree, int index)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  943) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  944) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  945) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  946) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  947) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  948) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  949) 	int indent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  950) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  951) 	/* Align to a cache line (or half a cache line?). */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  952) 	while (index % 64)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  953) 		index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  954) 	tree->index = index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  955) 	indent = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  956) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  957) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  958) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  959) 		printf("Indexing %s_%x: %d\n", tree->type, tree->maxage, index);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  960) 	if (tree->childnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  961) 		index += tree->leaf_size(tree->root);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  962) 		goto done;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  963) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  964) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  965) 	assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  966) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  967) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  968) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  969) 		if (!node->mark)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  970) 			goto skip;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  971) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  972) 		if (node->index != index)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  973) 			node->index = index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  974) 		index += node->size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  975) skip:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  976) 		while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  977) 			bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  978) 			if (node->mark && (leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  979) 				leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  980) 				if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  981) 					assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  982) 					*tree->leaf_index(tree, node->left) =
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  983) 									index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  984) 					index += tree->leaf_size(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  985) 					count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  986) 				} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  987) 					assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  988) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  989) 					node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  990) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  991) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  992) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  993) 			if (node->mark && (rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  994) 				rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  995) 				if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  996) 					assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  997) 					*tree->leaf_index(tree, node->right) = index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  998) 					index += tree->leaf_size(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400  999) 					count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1000) 				} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1001) 					assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1002) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1003) 					node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1004) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1005) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1006) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1007) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1008) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1009) 			node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1010) 			indent -= 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1011) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1012) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1013) done:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1014) 	/* Round up to a multiple of 16 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1015) 	while (index % 16)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1016) 		index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1017) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1018) 		printf("Final index %d\n", index);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1019) 	return index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1020) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1021) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1022) /*
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1023)  * Mark the nodes in a subtree, helper for size_nodes().
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1024)  */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1025) static int mark_subtree(struct node *node)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1026) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1027) 	int changed;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1028) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1029) 	if (!node || node->mark)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1030) 		return 0;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1031) 	node->mark = 1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1032) 	node->index = node->parent->index;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1033) 	changed = 1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1034) 	if (node->leftnode == NODE)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1035) 		changed += mark_subtree(node->left);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1036) 	if (node->rightnode == NODE)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1037) 		changed += mark_subtree(node->right);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1038) 	return changed;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1039) }
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1040) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1041) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1042)  * Compute the size of nodes and leaves. We start by assuming that
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1043)  * each node needs to store a three-byte offset. The indexes of the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1044)  * nodes are calculated based on that, and then this function is
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1045)  * called to see if the sizes of some nodes can be reduced.  This is
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1046)  * repeated until no more changes are seen.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1047)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1048) static int size_nodes(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1049) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1050) 	struct tree *next;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1051) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1052) 	struct node *right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1053) 	struct node *n;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1054) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1055) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1056) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1057) 	unsigned int pathbits;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1058) 	unsigned int pathmask;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1059) 	unsigned int nbit;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1060) 	int changed;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1061) 	int offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1062) 	int size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1063) 	int indent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1064) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1065) 	indent = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1066) 	changed = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1067) 	size = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1068) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1069) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1070) 		printf("Sizing %s_%x\n", tree->type, tree->maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1071) 	if (tree->childnode == LEAF)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1072) 		goto done;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1073) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1074) 	assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1075) 	pathbits = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1076) 	pathmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1077) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1078) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1079) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1080) 		if (!node->mark)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1081) 			goto skip;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1082) 		offset = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1083) 		if (!node->left || !node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1084) 			size = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1085) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1086) 			if (node->rightnode == NODE) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1087) 				/*
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1088) 				 * If the right node is not marked,
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1089) 				 * look for a corresponding node in
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1090) 				 * the next tree.  Such a node need
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1091) 				 * not exist.
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1092) 				 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1093) 				right = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1094) 				next = tree->next;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1095) 				while (!right->mark) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1096) 					assert(next);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1097) 					n = next->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1098) 					while (n->bitnum != node->bitnum) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1099) 						nbit = 1 << n->bitnum;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1100) 						if (!(pathmask & nbit))
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1101) 							break;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1102) 						if (pathbits & nbit) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1103) 							if (n->rightnode == LEAF)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1104) 								break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1105) 							n = n->right;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1106) 						} else {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1107) 							if (n->leftnode == LEAF)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1108) 								break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1109) 							n = n->left;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1110) 						}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1111) 					}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1112) 					if (n->bitnum != node->bitnum)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1113) 						break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1114) 					n = n->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1115) 					right = n;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1116) 					next = next->next;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1117) 				}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1118) 				/* Make sure the right node is marked. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1119) 				if (!right->mark)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1120) 					changed += mark_subtree(right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1121) 				offset = right->index - node->index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1122) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1123) 				offset = *tree->leaf_index(tree, node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1124) 				offset -= node->index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1125) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1126) 			assert(offset >= 0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1127) 			assert(offset <= 0xffffff);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1128) 			if (offset <= 0xff) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1129) 				size = 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1130) 			} else if (offset <= 0xffff) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1131) 				size = 3;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1132) 			} else { /* offset <= 0xffffff */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1133) 				size = 4;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1134) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1135) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1136) 		if (node->size != size || node->offset != offset) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1137) 			node->size = size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1138) 			node->offset = offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1139) 			changed++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1140) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1141) skip:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1142) 		while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1143) 			bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1144) 			pathmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1145) 			if (node->mark && (leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1146) 				leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1147) 				if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1148) 					assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1149) 				} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1150) 					assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1151) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1152) 					node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1153) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1154) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1155) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1156) 			if (node->mark && (rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1157) 				rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1158) 				pathbits |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1159) 				if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1160) 					assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1161) 				} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1162) 					assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1163) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1164) 					node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1165) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1166) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1167) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1168) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1169) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1170) 			pathmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1171) 			pathbits &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1172) 			node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1173) 			indent -= 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1174) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1175) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1176) done:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1177) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1178) 		printf("Found %d changes\n", changed);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1179) 	return changed;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1180) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1181) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1182) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1183)  * Emit a trie for the given tree into the data array.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1184)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1185) static void emit(struct tree *tree, unsigned char *data)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1186) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1187) 	struct node *node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1188) 	unsigned int leftmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1189) 	unsigned int rightmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1190) 	unsigned int bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1191) 	int offlen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1192) 	int offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1193) 	int index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1194) 	int indent;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1195) 	int size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1196) 	int bytes;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1197) 	int leaves;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1198) 	int nodes[4];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1199) 	unsigned char byte;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1200) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1201) 	nodes[0] = nodes[1] = nodes[2] = nodes[3] = 0;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1202) 	leaves = 0;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1203) 	bytes = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1204) 	index = tree->index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1205) 	data += index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1206) 	indent = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1207) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1208) 		printf("Emitting %s_%x\n", tree->type, tree->maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1209) 	if (tree->childnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1210) 		assert(tree->root);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1211) 		tree->leaf_emit(tree->root, data);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1212) 		size = tree->leaf_size(tree->root);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1213) 		index += size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1214) 		leaves++;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1215) 		goto done;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1216) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1217) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1218) 	assert(tree->childnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1219) 	node = tree->root;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1220) 	leftmask = rightmask = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1221) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1222) 		if (!node->mark)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1223) 			goto skip;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1224) 		assert(node->offset != -1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1225) 		assert(node->index == index);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1226) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1227) 		byte = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1228) 		if (node->nextbyte)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1229) 			byte |= NEXTBYTE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1230) 		byte |= (node->bitnum & BITNUM);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1231) 		if (node->left && node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1232) 			if (node->leftnode == NODE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1233) 				byte |= LEFTNODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1234) 			if (node->rightnode == NODE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1235) 				byte |= RIGHTNODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1236) 			if (node->offset <= 0xff)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1237) 				offlen = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1238) 			else if (node->offset <= 0xffff)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1239) 				offlen = 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1240) 			else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1241) 				offlen = 3;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1242) 			nodes[offlen]++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1243) 			offset = node->offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1244) 			byte |= offlen << OFFLEN_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1245) 			*data++ = byte;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1246) 			index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1247) 			while (offlen--) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1248) 				*data++ = offset & 0xff;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1249) 				index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1250) 				offset >>= 8;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1251) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1252) 		} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1253) 			if (node->leftnode == NODE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1254) 				byte |= TRIENODE;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1255) 			nodes[0]++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1256) 			*data++ = byte;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1257) 			index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1258) 		} else if (node->right) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1259) 			byte |= RIGHTNODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1260) 			if (node->rightnode == NODE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1261) 				byte |= TRIENODE;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1262) 			nodes[0]++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1263) 			*data++ = byte;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1264) 			index++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1265) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1266) 			assert(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1267) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1268) skip:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1269) 		while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1270) 			bitmask = 1 << node->bitnum;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1271) 			if (node->mark && (leftmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1272) 				leftmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1273) 				if (node->leftnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1274) 					assert(node->left);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1275) 					data = tree->leaf_emit(node->left,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1276) 							       data);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1277) 					size = tree->leaf_size(node->left);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1278) 					index += size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1279) 					bytes += size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1280) 					leaves++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1281) 				} else if (node->left) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1282) 					assert(node->leftnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1283) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1284) 					node = node->left;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1285) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1286) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1287) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1288) 			if (node->mark && (rightmask & bitmask) == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1289) 				rightmask |= bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1290) 				if (node->rightnode == LEAF) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1291) 					assert(node->right);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1292) 					data = tree->leaf_emit(node->right,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1293) 							       data);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1294) 					size = tree->leaf_size(node->right);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1295) 					index += size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1296) 					bytes += size;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1297) 					leaves++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1298) 				} else if (node->right) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1299) 					assert(node->rightnode == NODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1300) 					indent += 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1301) 					node = node->right;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1302) 					break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1303) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1304) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1305) 			leftmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1306) 			rightmask &= ~bitmask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1307) 			node = node->parent;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1308) 			indent -= 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1309) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1310) 	}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1311) done:
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1312) 	if (verbose > 0) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1313) 		printf("Emitted %d (%d) leaves",
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1314) 			leaves, bytes);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1315) 		printf(" %d (%d+%d+%d+%d) nodes",
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1316) 			nodes[0] + nodes[1] + nodes[2] + nodes[3],
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1317) 			nodes[0], nodes[1], nodes[2], nodes[3]);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1318) 		printf(" %d total\n", index - tree->index);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1319) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1320) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1321) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1322) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1323) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1324) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1325)  * Unicode data.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1326)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1327)  * We need to keep track of the Canonical Combining Class, the Age,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1328)  * and decompositions for a code point.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1329)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1330)  * For the Age, we store the index into the ages table.  Effectively
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1331)  * this is a generation number that the table maps to a unicode
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1332)  * version.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1333)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1334)  * The correction field is used to indicate that this entry is in the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1335)  * corrections array, which contains decompositions that were
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1336)  * corrected in later revisions.  The value of the correction field is
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1337)  * the Unicode version in which the mapping was corrected.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1338)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1339) struct unicode_data {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1340) 	unsigned int code;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1341) 	int ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1342) 	int gen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1343) 	int correction;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1344) 	unsigned int *utf32nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1345) 	unsigned int *utf32nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1346) 	char *utf8nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1347) 	char *utf8nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1348) };
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1349) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1350) struct unicode_data unicode_data[0x110000];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1351) struct unicode_data *corrections;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1352) int    corrections_count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1353) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1354) struct tree *nfdi_tree;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1355) struct tree *nfdicf_tree;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1356) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1357) struct tree *trees;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1358) int          trees_count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1359) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1360) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1361)  * Check the corrections array to see if this entry was corrected at
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1362)  * some point.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1363)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1364) static struct unicode_data *corrections_lookup(struct unicode_data *u)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1365) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1366) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1367) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1368) 	for (i = 0; i != corrections_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1369) 		if (u->code == corrections[i].code)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1370) 			return &corrections[i];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1371) 	return u;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1372) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1373) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1374) static int nfdi_equal(void *l, void *r)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1375) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1376) 	struct unicode_data *left = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1377) 	struct unicode_data *right = r;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1378) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1379) 	if (left->gen != right->gen)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1380) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1381) 	if (left->ccc != right->ccc)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1382) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1383) 	if (left->utf8nfdi && right->utf8nfdi &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1384) 	    strcmp(left->utf8nfdi, right->utf8nfdi) == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1385) 		return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1386) 	if (left->utf8nfdi || right->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1387) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1388) 	return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1389) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1390) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1391) static int nfdicf_equal(void *l, void *r)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1392) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1393) 	struct unicode_data *left = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1394) 	struct unicode_data *right = r;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1395) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1396) 	if (left->gen != right->gen)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1397) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1398) 	if (left->ccc != right->ccc)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1399) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1400) 	if (left->utf8nfdicf && right->utf8nfdicf &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1401) 	    strcmp(left->utf8nfdicf, right->utf8nfdicf) == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1402) 		return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1403) 	if (left->utf8nfdicf && right->utf8nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1404) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1405) 	if (left->utf8nfdicf || right->utf8nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1406) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1407) 	if (left->utf8nfdi && right->utf8nfdi &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1408) 	    strcmp(left->utf8nfdi, right->utf8nfdi) == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1409) 		return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1410) 	if (left->utf8nfdi || right->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1411) 		return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1412) 	return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1413) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1414) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1415) static void nfdi_print(void *l, int indent)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1416) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1417) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1418) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1419) 	printf("%*sleaf @ %p code %X ccc %d gen %d", indent, "", leaf,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1420) 		leaf->code, leaf->ccc, leaf->gen);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1421) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1422) 	if (leaf->utf8nfdi && leaf->utf8nfdi[0] == HANGUL)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1423) 		printf(" nfdi \"%s\"", "HANGUL SYLLABLE");
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1424) 	else if (leaf->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1425) 		printf(" nfdi \"%s\"", (const char*)leaf->utf8nfdi);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1426) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1427) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1428) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1429) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1430) static void nfdicf_print(void *l, int indent)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1431) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1432) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1433) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1434) 	printf("%*sleaf @ %p code %X ccc %d gen %d", indent, "", leaf,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1435) 		leaf->code, leaf->ccc, leaf->gen);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1436) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1437) 	if (leaf->utf8nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1438) 		printf(" nfdicf \"%s\"", (const char*)leaf->utf8nfdicf);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1439) 	else if (leaf->utf8nfdi && leaf->utf8nfdi[0] == HANGUL)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1440) 		printf(" nfdi \"%s\"", "HANGUL SYLLABLE");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1441) 	else if (leaf->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1442) 		printf(" nfdi \"%s\"", (const char*)leaf->utf8nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1443) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1444) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1445) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1446) static int nfdi_mark(void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1447) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1448) 	return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1449) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1450) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1451) static int nfdicf_mark(void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1452) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1453) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1454) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1455) 	if (leaf->utf8nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1456) 		return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1457) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1458) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1459) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1460) static int correction_mark(void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1461) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1462) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1463) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1464) 	return leaf->correction;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1465) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1466) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1467) static int nfdi_size(void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1468) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1469) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1470) 	int size = 2;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1471) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1472) 	if (HANGUL_SYLLABLE(leaf->code))
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1473) 		size += 1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1474) 	else if (leaf->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1475) 		size += strlen(leaf->utf8nfdi) + 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1476) 	return size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1477) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1478) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1479) static int nfdicf_size(void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1480) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1481) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1482) 	int size = 2;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1483) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1484) 	if (HANGUL_SYLLABLE(leaf->code))
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1485) 		size += 1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1486) 	else if (leaf->utf8nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1487) 		size += strlen(leaf->utf8nfdicf) + 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1488) 	else if (leaf->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1489) 		size += strlen(leaf->utf8nfdi) + 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1490) 	return size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1491) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1492) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1493) static int *nfdi_index(struct tree *tree, void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1494) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1495) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1496) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1497) 	return &tree->leafindex[leaf->code];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1498) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1499) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1500) static int *nfdicf_index(struct tree *tree, void *l)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1501) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1502) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1503) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1504) 	return &tree->leafindex[leaf->code];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1505) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1506) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1507) static unsigned char *nfdi_emit(void *l, unsigned char *data)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1508) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1509) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1510) 	unsigned char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1511) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1512) 	*data++ = leaf->gen;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1513) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1514) 	if (HANGUL_SYLLABLE(leaf->code)) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1515) 		*data++ = DECOMPOSE;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1516) 		*data++ = HANGUL;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1517) 	} else if (leaf->utf8nfdi) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1518) 		*data++ = DECOMPOSE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1519) 		s = (unsigned char*)leaf->utf8nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1520) 		while ((*data++ = *s++) != 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1521) 			;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1522) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1523) 		*data++ = leaf->ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1524) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1525) 	return data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1526) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1527) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1528) static unsigned char *nfdicf_emit(void *l, unsigned char *data)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1529) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1530) 	struct unicode_data *leaf = l;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1531) 	unsigned char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1532) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1533) 	*data++ = leaf->gen;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1534) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1535) 	if (HANGUL_SYLLABLE(leaf->code)) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1536) 		*data++ = DECOMPOSE;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1537) 		*data++ = HANGUL;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1538) 	} else if (leaf->utf8nfdicf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1539) 		*data++ = DECOMPOSE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1540) 		s = (unsigned char*)leaf->utf8nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1541) 		while ((*data++ = *s++) != 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1542) 			;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1543) 	} else if (leaf->utf8nfdi) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1544) 		*data++ = DECOMPOSE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1545) 		s = (unsigned char*)leaf->utf8nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1546) 		while ((*data++ = *s++) != 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1547) 			;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1548) 	} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1549) 		*data++ = leaf->ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1550) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1551) 	return data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1552) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1553) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1554) static void utf8_create(struct unicode_data *data)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1555) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1556) 	char utf[18*4+1];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1557) 	char *u;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1558) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1559) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1560) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1561) 	if (data->utf8nfdi) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1562) 		assert(data->utf8nfdi[0] == HANGUL);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1563) 		return;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1564) 	}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1565) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1566) 	u = utf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1567) 	um = data->utf32nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1568) 	if (um) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1569) 		for (i = 0; um[i]; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1570) 			u += utf8encode(u, um[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1571) 		*u = '\0';
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1572) 		data->utf8nfdi = strdup(utf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1573) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1574) 	u = utf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1575) 	um = data->utf32nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1576) 	if (um) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1577) 		for (i = 0; um[i]; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1578) 			u += utf8encode(u, um[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1579) 		*u = '\0';
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1580) 		if (!data->utf8nfdi || strcmp(data->utf8nfdi, utf))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1581) 			data->utf8nfdicf = strdup(utf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1582) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1583) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1584) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1585) static void utf8_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1586) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1587) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1588) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1589) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1590) 	for (unichar = 0; unichar != 0x110000; unichar++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1591) 		utf8_create(&unicode_data[unichar]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1592) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1593) 	for (i = 0; i != corrections_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1594) 		utf8_create(&corrections[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1595) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1596) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1597) static void trees_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1598) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1599) 	struct unicode_data *data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1600) 	unsigned int maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1601) 	unsigned int nextage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1602) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1603) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1604) 	int j;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1605) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1606) 	/* Count the number of different ages. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1607) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1608) 	nextage = (unsigned int)-1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1609) 	do {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1610) 		maxage = nextage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1611) 		nextage = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1612) 		for (i = 0; i <= corrections_count; i++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1613) 			data = &corrections[i];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1614) 			if (nextage < data->correction &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1615) 			    data->correction < maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1616) 				nextage = data->correction;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1617) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1618) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1619) 	} while (nextage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1620) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1621) 	/* Two trees per age: nfdi and nfdicf */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1622) 	trees_count = count * 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1623) 	trees = calloc(trees_count, sizeof(struct tree));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1624) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1625) 	/* Assign ages to the trees. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1626) 	count = trees_count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1627) 	nextage = (unsigned int)-1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1628) 	do {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1629) 		maxage = nextage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1630) 		trees[--count].maxage = maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1631) 		trees[--count].maxage = maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1632) 		nextage = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1633) 		for (i = 0; i <= corrections_count; i++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1634) 			data = &corrections[i];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1635) 			if (nextage < data->correction &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1636) 			    data->correction < maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1637) 				nextage = data->correction;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1638) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1639) 	} while (nextage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1640) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1641) 	/* The ages assigned above are off by one. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1642) 	for (i = 0; i != trees_count; i++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1643) 		j = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1644) 		while (ages[j] < trees[i].maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1645) 			j++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1646) 		trees[i].maxage = ages[j-1];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1647) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1648) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1649) 	/* Set up the forwarding between trees. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1650) 	trees[trees_count-2].next = &trees[trees_count-1];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1651) 	trees[trees_count-1].leaf_mark = nfdi_mark;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1652) 	trees[trees_count-2].leaf_mark = nfdicf_mark;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1653) 	for (i = 0; i != trees_count-2; i += 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1654) 		trees[i].next = &trees[trees_count-2];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1655) 		trees[i].leaf_mark = correction_mark;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1656) 		trees[i+1].next = &trees[trees_count-1];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1657) 		trees[i+1].leaf_mark = correction_mark;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1658) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1659) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1660) 	/* Assign the callouts. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1661) 	for (i = 0; i != trees_count; i += 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1662) 		trees[i].type = "nfdicf";
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1663) 		trees[i].leaf_equal = nfdicf_equal;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1664) 		trees[i].leaf_print = nfdicf_print;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1665) 		trees[i].leaf_size = nfdicf_size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1666) 		trees[i].leaf_index = nfdicf_index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1667) 		trees[i].leaf_emit = nfdicf_emit;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1668) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1669) 		trees[i+1].type = "nfdi";
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1670) 		trees[i+1].leaf_equal = nfdi_equal;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1671) 		trees[i+1].leaf_print = nfdi_print;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1672) 		trees[i+1].leaf_size = nfdi_size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1673) 		trees[i+1].leaf_index = nfdi_index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1674) 		trees[i+1].leaf_emit = nfdi_emit;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1675) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1676) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1677) 	/* Finish init. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1678) 	for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1679) 		trees[i].childnode = NODE;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1680) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1681) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1682) static void trees_populate(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1683) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1684) 	struct unicode_data *data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1685) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1686) 	char keyval[4];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1687) 	int keylen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1688) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1689) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1690) 	for (i = 0; i != trees_count; i++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1691) 		if (verbose > 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1692) 			printf("Populating %s_%x\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1693) 				trees[i].type, trees[i].maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1694) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1695) 		for (unichar = 0; unichar != 0x110000; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1696) 			if (unicode_data[unichar].gen < 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1697) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1698) 			keylen = utf8encode(keyval, unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1699) 			data = corrections_lookup(&unicode_data[unichar]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1700) 			if (data->correction <= trees[i].maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1701) 				data = &unicode_data[unichar];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1702) 			insert(&trees[i], keyval, keylen, data);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1703) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1704) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1705) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1706) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1707) static void trees_reduce(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1708) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1709) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1710) 	int size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1711) 	int changed;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1712) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1713) 	for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1714) 		prune(&trees[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1715) 	for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1716) 		mark_nodes(&trees[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1717) 	do {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1718) 		size = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1719) 		for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1720) 			size = index_nodes(&trees[i], size);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1721) 		changed = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1722) 		for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1723) 			changed += size_nodes(&trees[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1724) 	} while (changed);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1725) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1726) 	utf8data = calloc(size, 1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1727) 	utf8data_size = size;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1728) 	for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1729) 		emit(&trees[i], utf8data);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1730) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1731) 	if (verbose > 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1732) 		for (i = 0; i != trees_count; i++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1733) 			printf("%s_%x idx %d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1734) 				trees[i].type, trees[i].maxage, trees[i].index);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1735) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1736) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1737) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1738) 	nfdi = utf8data + trees[trees_count-1].index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1739) 	nfdicf = utf8data + trees[trees_count-2].index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1740) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1741) 	nfdi_tree = &trees[trees_count-1];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1742) 	nfdicf_tree = &trees[trees_count-2];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1743) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1744) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1745) static void verify(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1746) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1747) 	struct unicode_data *data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1748) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1749) 	unsigned int	unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1750) 	char		key[4];
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1751) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1752) 	int		report;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1753) 	int		nocf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1754) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1755) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1756) 		printf("Verifying %s_%x\n", tree->type, tree->maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1757) 	nocf = strcmp(tree->type, "nfdicf");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1758) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1759) 	for (unichar = 0; unichar != 0x110000; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1760) 		report = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1761) 		data = corrections_lookup(&unicode_data[unichar]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1762) 		if (data->correction <= tree->maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1763) 			data = &unicode_data[unichar];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1764) 		utf8encode(key,unichar);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1765) 		leaf = utf8lookup(tree, hangul, key);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1766) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1767) 		if (!leaf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1768) 			if (data->gen != -1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1769) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1770) 			if (unichar < 0xd800 || unichar > 0xdfff)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1771) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1772) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1773) 			if (unichar >= 0xd800 && unichar <= 0xdfff)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1774) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1775) 			if (data->gen == -1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1776) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1777) 			if (data->gen != LEAF_GEN(leaf))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1778) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1779) 			if (LEAF_CCC(leaf) == DECOMPOSE) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1780) 				if (HANGUL_SYLLABLE(data->code)) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1781) 					if (data->utf8nfdi[0] != HANGUL)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1782) 						report++;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 1783) 				} else if (nocf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1784) 					if (!data->utf8nfdi) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1785) 						report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1786) 					} else if (strcmp(data->utf8nfdi,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1787) 							  LEAF_STR(leaf))) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1788) 						report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1789) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1790) 				} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1791) 					if (!data->utf8nfdicf &&
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1792) 					    !data->utf8nfdi) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1793) 						report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1794) 					} else if (data->utf8nfdicf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1795) 						if (strcmp(data->utf8nfdicf,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1796) 							   LEAF_STR(leaf)))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1797) 							report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1798) 					} else if (strcmp(data->utf8nfdi,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1799) 							  LEAF_STR(leaf))) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1800) 						report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1801) 					}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1802) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1803) 			} else if (data->ccc != LEAF_CCC(leaf)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1804) 				report++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1805) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1806) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1807) 		if (report) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1808) 			printf("%X code %X gen %d ccc %d"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1809) 				" nfdi -> \"%s\"",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1810) 				unichar, data->code, data->gen,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1811) 				data->ccc,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1812) 				data->utf8nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1813) 			if (leaf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1814) 				printf(" gen %d ccc %d"
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1815) 					" nfdi -> \"%s\"",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1816) 					LEAF_GEN(leaf),
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1817) 					LEAF_CCC(leaf),
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1818) 					LEAF_CCC(leaf) == DECOMPOSE ?
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1819) 						LEAF_STR(leaf) : "");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1820) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1821) 			printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1822) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1823) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1824) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1825) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1826) static void trees_verify(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1827) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1828) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1829) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1830) 	for (i = 0; i != trees_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1831) 		verify(&trees[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1832) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1833) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1834) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1835) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1836) static void help(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1837) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1838) 	printf("Usage: %s [options]\n", argv0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1839) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1840) 	printf("This program creates an a data trie used for parsing and\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1841) 	printf("normalization of UTF-8 strings. The trie is derived from\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1842) 	printf("a set of input files from the Unicode character database\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1843) 	printf("found at: http://www.unicode.org/Public/UCD/latest/ucd/\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1844) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1845) 	printf("The generated tree supports two normalization forms:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1846) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1847) 	printf("\tnfdi:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1848) 	printf("\t- Apply unicode normalization form NFD.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1849) 	printf("\t- Remove any Default_Ignorable_Code_Point.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1850) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1851) 	printf("\tnfdicf:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1852) 	printf("\t- Apply unicode normalization form NFD.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1853) 	printf("\t- Remove any Default_Ignorable_Code_Point.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1854) 	printf("\t- Apply a full casefold (C + F).\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1855) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1856) 	printf("These forms were chosen as being most useful when dealing\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1857) 	printf("with file names: NFD catches most cases where characters\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1858) 	printf("should be considered equivalent. The ignorables are mostly\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1859) 	printf("invisible, making names hard to type.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1860) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1861) 	printf("The options to specify the files to be used are listed\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1862) 	printf("below with their default values, which are the names used\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1863) 	printf("by version 11.0.0 of the Unicode Character Database.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1864) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1865) 	printf("The input files:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1866) 	printf("\t-a %s\n", AGE_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1867) 	printf("\t-c %s\n", CCC_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1868) 	printf("\t-p %s\n", PROP_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1869) 	printf("\t-d %s\n", DATA_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1870) 	printf("\t-f %s\n", FOLD_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1871) 	printf("\t-n %s\n", NORM_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1872) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1873) 	printf("Additionally, the generated tables are tested using:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1874) 	printf("\t-t %s\n", TEST_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1875) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1876) 	printf("Finally, the output file:\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1877) 	printf("\t-o %s\n", UTF8_NAME);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1878) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1879) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1880) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1881) static void usage(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1882) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1883) 	help();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1884) 	exit(1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1885) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1886) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1887) static void open_fail(const char *name, int error)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1888) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1889) 	printf("Error %d opening %s: %s\n", error, name, strerror(error));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1890) 	exit(1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1891) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1892) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1893) static void file_fail(const char *filename)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1894) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1895) 	printf("Error parsing %s\n", filename);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1896) 	exit(1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1897) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1898) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1899) static void line_fail(const char *filename, const char *line)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1900) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1901) 	printf("Error parsing %s:%s\n", filename, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1902) 	exit(1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1903) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1904) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1905) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1906) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1907) static void print_utf32(unsigned int *utf32str)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1908) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1909) 	int	i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1910) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1911) 	for (i = 0; utf32str[i]; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1912) 		printf(" %X", utf32str[i]);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1913) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1914) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1915) static void print_utf32nfdi(unsigned int unichar)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1916) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1917) 	printf(" %X ->", unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1918) 	print_utf32(unicode_data[unichar].utf32nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1919) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1920) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1921) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1922) static void print_utf32nfdicf(unsigned int unichar)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1923) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1924) 	printf(" %X ->", unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1925) 	print_utf32(unicode_data[unichar].utf32nfdicf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1926) 	printf("\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1927) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1928) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1929) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1930) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1931) static void age_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1932) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1933) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1934) 	unsigned int first;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1935) 	unsigned int last;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1936) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1937) 	unsigned int major;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1938) 	unsigned int minor;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1939) 	unsigned int revision;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1940) 	int gen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1941) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1942) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1943) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1944) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1945) 		printf("Parsing %s\n", age_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1946) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1947) 	file = fopen(age_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1948) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1949) 		open_fail(age_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1950) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1951) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1952) 	gen = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1953) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1954) 		ret = sscanf(line, "# Age=V%d_%d_%d",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1955) 				&major, &minor, &revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1956) 		if (ret == 3) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1957) 			ages_count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1958) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1959) 				printf(" Age V%d_%d_%d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1960) 					major, minor, revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1961) 			if (!age_valid(major, minor, revision))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1962) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1963) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1964) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1965) 		ret = sscanf(line, "# Age=V%d_%d", &major, &minor);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1966) 		if (ret == 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1967) 			ages_count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1968) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1969) 				printf(" Age V%d_%d\n", major, minor);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1970) 			if (!age_valid(major, minor, 0))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1971) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1972) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1973) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1974) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1975) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1976) 	/* We must have found something above. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1977) 	if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1978) 		printf("%d age entries\n", ages_count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1979) 	if (ages_count == 0 || ages_count > MAXGEN)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1980) 		file_fail(age_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1981) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1982) 	/* There is a 0 entry. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1983) 	ages_count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1984) 	ages = calloc(ages_count + 1, sizeof(*ages));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1985) 	/* And a guard entry. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1986) 	ages[ages_count] = (unsigned int)-1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1987) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1988) 	rewind(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1989) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1990) 	gen = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1991) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1992) 		ret = sscanf(line, "# Age=V%d_%d_%d",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1993) 				&major, &minor, &revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1994) 		if (ret == 3) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1995) 			ages[++gen] =
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1996) 				UNICODE_AGE(major, minor, revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1997) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1998) 				printf(" Age V%d_%d_%d = gen %d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 1999) 					major, minor, revision, gen);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2000) 			if (!age_valid(major, minor, revision))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2001) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2002) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2003) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2004) 		ret = sscanf(line, "# Age=V%d_%d", &major, &minor);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2005) 		if (ret == 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2006) 			ages[++gen] = UNICODE_AGE(major, minor, 0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2007) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2008) 				printf(" Age V%d_%d = %d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2009) 					major, minor, gen);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2010) 			if (!age_valid(major, minor, 0))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2011) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2012) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2013) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2014) 		ret = sscanf(line, "%X..%X ; %d.%d #",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2015) 			     &first, &last, &major, &minor);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2016) 		if (ret == 4) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2017) 			for (unichar = first; unichar <= last; unichar++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2018) 				unicode_data[unichar].gen = gen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2019) 			count += 1 + last - first;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2020) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2021) 				printf("  %X..%X gen %d\n", first, last, gen);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2022) 			if (!utf32valid(first) || !utf32valid(last))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2023) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2024) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2025) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2026) 		ret = sscanf(line, "%X ; %d.%d #", &unichar, &major, &minor);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2027) 		if (ret == 3) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2028) 			unicode_data[unichar].gen = gen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2029) 			count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2030) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2031) 				printf("  %X gen %d\n", unichar, gen);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2032) 			if (!utf32valid(unichar))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2033) 				line_fail(age_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2034) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2035) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2036) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2037) 	unicode_maxage = ages[gen];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2038) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2039) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2040) 	/* Nix surrogate block */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2041) 	if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2042) 		printf(" Removing surrogate block D800..DFFF\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2043) 	for (unichar = 0xd800; unichar <= 0xdfff; unichar++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2044) 		unicode_data[unichar].gen = -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2045) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2046) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2047) 	        printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2048) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2049) 		file_fail(age_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2050) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2051) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2052) static void ccc_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2053) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2054) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2055) 	unsigned int first;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2056) 	unsigned int last;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2057) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2058) 	unsigned int value;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2059) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2060) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2061) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2062) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2063) 		printf("Parsing %s\n", ccc_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2064) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2065) 	file = fopen(ccc_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2066) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2067) 		open_fail(ccc_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2068) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2069) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2070) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2071) 		ret = sscanf(line, "%X..%X ; %d #", &first, &last, &value);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2072) 		if (ret == 3) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2073) 			for (unichar = first; unichar <= last; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2074) 				unicode_data[unichar].ccc = value;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2075)                                 count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2076) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2077) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2078) 				printf(" %X..%X ccc %d\n", first, last, value);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2079) 			if (!utf32valid(first) || !utf32valid(last))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2080) 				line_fail(ccc_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2081) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2082) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2083) 		ret = sscanf(line, "%X ; %d #", &unichar, &value);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2084) 		if (ret == 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2085) 			unicode_data[unichar].ccc = value;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2086)                         count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2087) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2088) 				printf(" %X ccc %d\n", unichar, value);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2089) 			if (!utf32valid(unichar))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2090) 				line_fail(ccc_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2091) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2092) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2093) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2094) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2095) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2096) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2097) 		printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2098) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2099) 		file_fail(ccc_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2100) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2101) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2102) static int ignore_compatibility_form(char *type)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2103) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2104) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2105) 	char *ignored_types[] = {"font", "noBreak", "initial", "medial",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2106) 				 "final", "isolated", "circle", "super",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2107) 				 "sub", "vertical", "wide", "narrow",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2108) 				 "small", "square", "fraction", "compat"};
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2109) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2110) 	for (i = 0 ; i < ARRAY_SIZE(ignored_types); i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2111) 		if (strcmp(type, ignored_types[i]) == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2112) 			return 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2113) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2114) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2115) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2116) static void nfdi_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2117) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2118) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2119) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2120) 	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2121) 	char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2122) 	char *type;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2123) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2124) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2125) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2126) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2127) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2128) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2129) 		printf("Parsing %s\n", data_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2130) 	file = fopen(data_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2131) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2132) 		open_fail(data_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2133) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2134) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2135) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2136) 		ret = sscanf(line, "%X;%*[^;];%*[^;];%*[^;];%*[^;];%[^;];",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2137) 			     &unichar, buf0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2138) 		if (ret != 2)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2139) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2140) 		if (!utf32valid(unichar))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2141) 			line_fail(data_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2142) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2143) 		s = buf0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2144) 		/* skip over <tag> */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2145) 		if (*s == '<') {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2146) 			type = ++s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2147) 			while (*++s != '>');
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2148) 			*s++ = '\0';
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2149) 			if(ignore_compatibility_form(type))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2150) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2151) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2152) 		/* decode the decomposition into UTF-32 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2153) 		i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2154) 		while (*s) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2155) 			mapping[i] = strtoul(s, &s, 16);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2156) 			if (!utf32valid(mapping[i]))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2157) 				line_fail(data_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2158) 			i++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2159) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2160) 		mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2161) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2162) 		um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2163) 		memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2164) 		unicode_data[unichar].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2165) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2166) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2167) 			print_utf32nfdi(unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2168) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2169) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2170) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2171) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2172) 		printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2173) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2174) 		file_fail(data_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2175) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2176) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2177) static void nfdicf_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2178) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2179) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2180) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2181) 	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2182) 	char status;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2183) 	char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2184) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2185) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2186) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2187) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2188) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2189) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2190) 		printf("Parsing %s\n", fold_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2191) 	file = fopen(fold_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2192) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2193) 		open_fail(fold_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2194) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2195) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2196) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2197) 		ret = sscanf(line, "%X; %c; %[^;];", &unichar, &status, buf0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2198) 		if (ret != 3)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2199) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2200) 		if (!utf32valid(unichar))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2201) 			line_fail(fold_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2202) 		/* Use the C+F casefold. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2203) 		if (status != 'C' && status != 'F')
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2204) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2205) 		s = buf0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2206) 		if (*s == '<')
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2207) 			while (*s++ != ' ')
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2208) 				;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2209) 		i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2210) 		while (*s) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2211) 			mapping[i] = strtoul(s, &s, 16);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2212) 			if (!utf32valid(mapping[i]))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2213) 				line_fail(fold_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2214) 			i++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2215) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2216) 		mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2217) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2218) 		um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2219) 		memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2220) 		unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2221) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2222) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2223) 			print_utf32nfdicf(unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2224) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2225) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2226) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2227) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2228) 		printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2229) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2230) 		file_fail(fold_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2231) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2232) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2233) static void ignore_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2234) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2235) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2236) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2237) 	unsigned int first;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2238) 	unsigned int last;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2239) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2240) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2241) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2242) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2243) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2244) 		printf("Parsing %s\n", prop_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2245) 	file = fopen(prop_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2246) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2247) 		open_fail(prop_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2248) 	assert(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2249) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2250) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2251) 		ret = sscanf(line, "%X..%X ; %s # ", &first, &last, buf0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2252) 		if (ret == 3) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2253) 			if (strcmp(buf0, "Default_Ignorable_Code_Point"))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2254) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2255) 			if (!utf32valid(first) || !utf32valid(last))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2256) 				line_fail(prop_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2257) 			for (unichar = first; unichar <= last; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2258) 				free(unicode_data[unichar].utf32nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2259) 				um = malloc(sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2260) 				*um = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2261) 				unicode_data[unichar].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2262) 				free(unicode_data[unichar].utf32nfdicf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2263) 				um = malloc(sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2264) 				*um = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2265) 				unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2266) 				count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2267) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2268) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2269) 				printf(" %X..%X Default_Ignorable_Code_Point\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2270) 					first, last);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2271) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2272) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2273) 		ret = sscanf(line, "%X ; %s # ", &unichar, buf0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2274) 		if (ret == 2) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2275) 			if (strcmp(buf0, "Default_Ignorable_Code_Point"))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2276) 				continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2277) 			if (!utf32valid(unichar))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2278) 				line_fail(prop_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2279) 			free(unicode_data[unichar].utf32nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2280) 			um = malloc(sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2281) 			*um = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2282) 			unicode_data[unichar].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2283) 			free(unicode_data[unichar].utf32nfdicf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2284) 			um = malloc(sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2285) 			*um = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2286) 			unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2287) 			if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2288) 				printf(" %X Default_Ignorable_Code_Point\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2289) 					unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2290) 			count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2291) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2292) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2293) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2294) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2295) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2296) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2297) 		printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2298) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2299) 		file_fail(prop_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2300) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2301) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2302) static void corrections_init(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2303) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2304) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2305) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2306) 	unsigned int major;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2307) 	unsigned int minor;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2308) 	unsigned int revision;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2309) 	unsigned int age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2310) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2311) 	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2312) 	char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2313) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2314) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2315) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2316) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2317) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2318) 		printf("Parsing %s\n", norm_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2319) 	file = fopen(norm_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2320) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2321) 		open_fail(norm_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2322) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2323) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2324) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2325) 		ret = sscanf(line, "%X;%[^;];%[^;];%d.%d.%d #",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2326) 				&unichar, buf0, buf1,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2327) 				&major, &minor, &revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2328) 		if (ret != 6)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2329) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2330) 		if (!utf32valid(unichar) || !age_valid(major, minor, revision))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2331) 			line_fail(norm_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2332) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2333) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2334) 	corrections = calloc(count, sizeof(struct unicode_data));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2335) 	corrections_count = count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2336) 	rewind(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2337) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2338) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2339) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2340) 		ret = sscanf(line, "%X;%[^;];%[^;];%d.%d.%d #",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2341) 				&unichar, buf0, buf1,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2342) 				&major, &minor, &revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2343) 		if (ret != 6)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2344) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2345) 		if (!utf32valid(unichar) || !age_valid(major, minor, revision))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2346) 			line_fail(norm_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2347) 		corrections[count] = unicode_data[unichar];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2348) 		assert(corrections[count].code == unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2349) 		age = UNICODE_AGE(major, minor, revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2350) 		corrections[count].correction = age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2351) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2352) 		i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2353) 		s = buf0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2354) 		while (*s) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2355) 			mapping[i] = strtoul(s, &s, 16);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2356) 			if (!utf32valid(mapping[i]))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2357) 				line_fail(norm_name, line);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2358) 			i++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2359) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2360) 		mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2361) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2362) 		um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2363) 		memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2364) 		corrections[count].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2365) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2366) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2367) 			printf(" %X -> %s -> %s V%d_%d_%d\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2368) 				unichar, buf0, buf1, major, minor, revision);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2369) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2370) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2371) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2372) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2373) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2374) 	        printf("Found %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2375) 	if (count == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2376) 		file_fail(norm_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2377) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2378) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2379) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2380) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2381) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2382)  * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2383)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2384)  * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2385)  * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2386)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2387)  * SBase = 0xAC00
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2388)  * LBase = 0x1100
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2389)  * VBase = 0x1161
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2390)  * TBase = 0x11A7
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2391)  * LCount = 19
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2392)  * VCount = 21
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2393)  * TCount = 28
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2394)  * NCount = 588 (VCount * TCount)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2395)  * SCount = 11172 (LCount * NCount)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2396)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2397)  * Decomposition:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2398)  *   SIndex = s - SBase
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2399)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2400)  * LV (Canonical/Full)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2401)  *   LIndex = SIndex / NCount
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2402)  *   VIndex = (Sindex % NCount) / TCount
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2403)  *   LPart = LBase + LIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2404)  *   VPart = VBase + VIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2405)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2406)  * LVT (Canonical)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2407)  *   LVIndex = (SIndex / TCount) * TCount
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2408)  *   TIndex = (Sindex % TCount)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2409)  *   LVPart = SBase + LVIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2410)  *   TPart = TBase + TIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2411)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2412)  * LVT (Full)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2413)  *   LIndex = SIndex / NCount
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2414)  *   VIndex = (Sindex % NCount) / TCount
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2415)  *   TIndex = (Sindex % TCount)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2416)  *   LPart = LBase + LIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2417)  *   VPart = VBase + VIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2418)  *   if (TIndex == 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2419)  *          d = <LPart, VPart>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2420)  *   } else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2421)  *          TPart = TBase + TIndex
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2422)  *          d = <LPart, VPart, TPart>
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2423)  *   }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2424)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2425)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2426) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2427) static void hangul_decompose(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2428) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2429) 	unsigned int sb = 0xAC00;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2430) 	unsigned int lb = 0x1100;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2431) 	unsigned int vb = 0x1161;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2432) 	unsigned int tb = 0x11a7;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2433) 	/* unsigned int lc = 19; */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2434) 	unsigned int vc = 21;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2435) 	unsigned int tc = 28;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2436) 	unsigned int nc = (vc * tc);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2437) 	/* unsigned int sc = (lc * nc); */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2438) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2439) 	unsigned int mapping[4];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2440) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2441)         int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2442) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2443) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2444) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2445) 		printf("Decomposing hangul\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2446) 	/* Hangul */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2447) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2448) 	for (unichar = 0xAC00; unichar <= 0xD7A3; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2449) 		unsigned int si = unichar - sb;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2450) 		unsigned int li = si / nc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2451) 		unsigned int vi = (si % nc) / tc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2452) 		unsigned int ti = si % tc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2453) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2454) 		i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2455) 		mapping[i++] = lb + li;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2456) 		mapping[i++] = vb + vi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2457) 		if (ti)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2458) 			mapping[i++] = tb + ti;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2459) 		mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2460) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2461) 		assert(!unicode_data[unichar].utf32nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2462) 		um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2463) 		memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2464) 		unicode_data[unichar].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2465) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2466) 		assert(!unicode_data[unichar].utf32nfdicf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2467) 		um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2468) 		memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2469) 		unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2470) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2471) 		/*
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2472) 		 * Add a cookie as a reminder that the hangul syllable
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2473) 		 * decompositions must not be stored in the generated
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2474) 		 * trie.
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2475) 		 */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2476) 		unicode_data[unichar].utf8nfdi = malloc(2);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2477) 		unicode_data[unichar].utf8nfdi[0] = HANGUL;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2478) 		unicode_data[unichar].utf8nfdi[1] = '\0';
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2479) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2480) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2481) 			print_utf32nfdi(unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2482) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2483) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2484) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2485) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2486) 		printf("Created %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2487) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2488) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2489) static void nfdi_decompose(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2490) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2491) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2492) 	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2493) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2494) 	unsigned int *dc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2495) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2496) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2497) 	int j;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2498) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2499) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2500) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2501) 		printf("Decomposing nfdi\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2502) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2503) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2504) 	for (unichar = 0; unichar != 0x110000; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2505) 		if (!unicode_data[unichar].utf32nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2506) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2507) 		for (;;) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2508) 			ret = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2509) 			i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2510) 			um = unicode_data[unichar].utf32nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2511) 			while (*um) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2512) 				dc = unicode_data[*um].utf32nfdi;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2513) 				if (dc) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2514) 					for (j = 0; dc[j]; j++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2515) 						mapping[i++] = dc[j];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2516) 					ret = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2517) 				} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2518) 					mapping[i++] = *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2519) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2520) 				um++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2521) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2522) 			mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2523) 			if (ret)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2524) 				break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2525) 			free(unicode_data[unichar].utf32nfdi);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2526) 			um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2527) 			memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2528) 			unicode_data[unichar].utf32nfdi = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2529) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2530) 		/* Add this decomposition to nfdicf if there is no entry. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2531) 		if (!unicode_data[unichar].utf32nfdicf) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2532) 			um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2533) 			memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2534) 			unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2535) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2536) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2537) 			print_utf32nfdi(unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2538) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2539) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2540) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2541) 		printf("Processed %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2542) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2543) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2544) static void nfdicf_decompose(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2545) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2546) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2547) 	unsigned int mapping[19]; /* Magic - guaranteed not to be exceeded. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2548) 	unsigned int *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2549) 	unsigned int *dc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2550) 	int count;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2551) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2552) 	int j;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2553) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2554) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2555) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2556) 		printf("Decomposing nfdicf\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2557) 	count = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2558) 	for (unichar = 0; unichar != 0x110000; unichar++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2559) 		if (!unicode_data[unichar].utf32nfdicf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2560) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2561) 		for (;;) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2562) 			ret = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2563) 			i = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2564) 			um = unicode_data[unichar].utf32nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2565) 			while (*um) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2566) 				dc = unicode_data[*um].utf32nfdicf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2567) 				if (dc) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2568) 					for (j = 0; dc[j]; j++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2569) 						mapping[i++] = dc[j];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2570) 					ret = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2571) 				} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2572) 					mapping[i++] = *um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2573) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2574) 				um++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2575) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2576) 			mapping[i++] = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2577) 			if (ret)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2578) 				break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2579) 			free(unicode_data[unichar].utf32nfdicf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2580) 			um = malloc(i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2581) 			memcpy(um, mapping, i * sizeof(unsigned int));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2582) 			unicode_data[unichar].utf32nfdicf = um;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2583) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2584) 		if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2585) 			print_utf32nfdicf(unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2586) 		count++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2587) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2588) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2589) 		printf("Processed %d entries\n", count);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2590) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2591) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2592) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2593) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2594) int utf8agemax(struct tree *, const char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2595) int utf8nagemax(struct tree *, const char *, size_t);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2596) int utf8agemin(struct tree *, const char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2597) int utf8nagemin(struct tree *, const char *, size_t);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2598) ssize_t utf8len(struct tree *, const char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2599) ssize_t utf8nlen(struct tree *, const char *, size_t);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2600) struct utf8cursor;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2601) int utf8cursor(struct utf8cursor *, struct tree *, const char *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2602) int utf8ncursor(struct utf8cursor *, struct tree *, const char *, size_t);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2603) int utf8byte(struct utf8cursor *);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2604) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2605) /*
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2606)  * Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2607)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2608)  * AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2609)  * D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2610)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2611)  * SBase = 0xAC00
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2612)  * LBase = 0x1100
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2613)  * VBase = 0x1161
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2614)  * TBase = 0x11A7
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2615)  * LCount = 19
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2616)  * VCount = 21
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2617)  * TCount = 28
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2618)  * NCount = 588 (VCount * TCount)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2619)  * SCount = 11172 (LCount * NCount)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2620)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2621)  * Decomposition:
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2622)  *   SIndex = s - SBase
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2623)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2624)  * LV (Canonical/Full)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2625)  *   LIndex = SIndex / NCount
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2626)  *   VIndex = (Sindex % NCount) / TCount
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2627)  *   LPart = LBase + LIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2628)  *   VPart = VBase + VIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2629)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2630)  * LVT (Canonical)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2631)  *   LVIndex = (SIndex / TCount) * TCount
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2632)  *   TIndex = (Sindex % TCount)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2633)  *   LVPart = SBase + LVIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2634)  *   TPart = TBase + TIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2635)  *
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2636)  * LVT (Full)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2637)  *   LIndex = SIndex / NCount
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2638)  *   VIndex = (Sindex % NCount) / TCount
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2639)  *   TIndex = (Sindex % TCount)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2640)  *   LPart = LBase + LIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2641)  *   VPart = VBase + VIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2642)  *   if (TIndex == 0) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2643)  *          d = <LPart, VPart>
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2644)  *   } else {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2645)  *          TPart = TBase + TIndex
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2646)  *          d = <LPart, VPart, TPart>
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2647)  *   }
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2648)  */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2649) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2650) /* Constants */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2651) #define SB	(0xAC00)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2652) #define LB	(0x1100)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2653) #define VB	(0x1161)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2654) #define TB	(0x11A7)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2655) #define LC	(19)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2656) #define VC	(21)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2657) #define TC	(28)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2658) #define NC	(VC * TC)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2659) #define SC	(LC * NC)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2660) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2661) /* Algorithmic decomposition of hangul syllable. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2662) static utf8leaf_t *utf8hangul(const char *str, unsigned char *hangul)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2663) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2664) 	unsigned int	si;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2665) 	unsigned int	li;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2666) 	unsigned int	vi;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2667) 	unsigned int	ti;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2668) 	unsigned char	*h;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2669) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2670) 	/* Calculate the SI, LI, VI, and TI values. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2671) 	si = utf8decode(str) - SB;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2672) 	li = si / NC;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2673) 	vi = (si % NC) / TC;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2674) 	ti = si % TC;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2675) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2676) 	/* Fill in base of leaf. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2677) 	h = hangul;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2678) 	LEAF_GEN(h) = 2;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2679) 	LEAF_CCC(h) = DECOMPOSE;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2680) 	h += 2;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2681) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2682) 	/* Add LPart, a 3-byte UTF-8 sequence. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2683) 	h += utf8encode((char *)h, li + LB);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2684) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2685) 	/* Add VPart, a 3-byte UTF-8 sequence. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2686) 	h += utf8encode((char *)h, vi + VB);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2687) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2688) 	/* Add TPart if required, also a 3-byte UTF-8 sequence. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2689) 	if (ti)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2690) 		h += utf8encode((char *)h, ti + TB);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2691) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2692) 	/* Terminate string. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2693) 	h[0] = '\0';
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2694) 
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2695) 	return hangul;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2696) }
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2697) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2698) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2699)  * Use trie to scan s, touching at most len bytes.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2700)  * Returns the leaf if one exists, NULL otherwise.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2701)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2702)  * A non-NULL return guarantees that the UTF-8 sequence starting at s
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2703)  * is well-formed and corresponds to a known unicode code point.  The
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2704)  * shorthand for this will be "is valid UTF-8 unicode".
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2705)  */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2706) static utf8leaf_t *utf8nlookup(struct tree *tree, unsigned char *hangul,
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2707) 			       const char *s, size_t len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2708) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2709) 	utf8trie_t	*trie;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2710) 	int		offlen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2711) 	int		offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2712) 	int		mask;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2713) 	int		node;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2714) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2715) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2716) 		return NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2717) 	if (len == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2718) 		return NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2719) 	node = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2720) 	trie = utf8data + tree->index;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2721) 	while (node) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2722) 		offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2723) 		if (*trie & NEXTBYTE) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2724) 			if (--len == 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2725) 				return NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2726) 			s++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2727) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2728) 		mask = 1 << (*trie & BITNUM);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2729) 		if (*s & mask) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2730) 			/* Right leg */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2731) 			if (offlen) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2732) 				/* Right node at offset of trie */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2733) 				node = (*trie & RIGHTNODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2734) 				offset = trie[offlen];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2735) 				while (--offlen) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2736) 					offset <<= 8;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2737) 					offset |= trie[offlen];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2738) 				}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2739) 				trie += offset;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2740) 			} else if (*trie & RIGHTPATH) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2741) 				/* Right node after this node */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2742) 				node = (*trie & TRIENODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2743) 				trie++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2744) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2745) 				/* No right node. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2746) 				return NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2747) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2748) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2749) 			/* Left leg */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2750) 			if (offlen) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2751) 				/* Left node after this node. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2752) 				node = (*trie & LEFTNODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2753) 				trie += offlen + 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2754) 			} else if (*trie & RIGHTPATH) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2755) 				/* No left node. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2756) 				return NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2757) 			} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2758) 				/* Left node after this node */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2759) 				node = (*trie & TRIENODE);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2760) 				trie++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2761) 			}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2762) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2763) 	}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2764) 	/*
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2765) 	 * Hangul decomposition is done algorithmically. These are the
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2766) 	 * codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2767) 	 * always 3 bytes long, so s has been advanced twice, and the
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2768) 	 * start of the sequence is at s-2.
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2769) 	 */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2770) 	if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL)
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2771) 		trie = utf8hangul(s - 2, hangul);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2772) 	return trie;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2773) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2774) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2775) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2776)  * Use trie to scan s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2777)  * Returns the leaf if one exists, NULL otherwise.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2778)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2779)  * Forwards to trie_nlookup().
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2780)  */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2781) static utf8leaf_t *utf8lookup(struct tree *tree, unsigned char *hangul,
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2782) 			      const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2783) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2784) 	return utf8nlookup(tree, hangul, s, (size_t)-1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2785) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2786) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2787) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2788)  * Return the number of bytes used by the current UTF-8 sequence.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2789)  * Assumes the input points to the first byte of a valid UTF-8
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2790)  * sequence.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2791)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2792) static inline int utf8clen(const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2793) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2794) 	unsigned char c = *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2795) 	return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2796) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2797) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2798) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2799)  * Maximum age of any character in s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2800)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2801)  * Return 0 if only non-assigned code points are used.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2802)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2803) int utf8agemax(struct tree *tree, const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2804) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2805) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2806) 	int		age = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2807) 	int		leaf_age;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2808) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2809) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2810) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2811) 		return -1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2812) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2813) 	while (*s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2814) 		leaf = utf8lookup(tree, hangul, s);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2815) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2816) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2817) 		leaf_age = ages[LEAF_GEN(leaf)];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2818) 		if (leaf_age <= tree->maxage && leaf_age > age)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2819) 			age = leaf_age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2820) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2821) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2822) 	return age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2823) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2824) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2825) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2826)  * Minimum age of any character in s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2827)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2828)  * Return 0 if non-assigned code points are used.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2829)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2830) int utf8agemin(struct tree *tree, const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2831) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2832) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2833) 	int		age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2834) 	int		leaf_age;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2835) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2836) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2837) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2838) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2839) 	age = tree->maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2840) 	while (*s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2841) 		leaf = utf8lookup(tree, hangul, s);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2842) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2843) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2844) 		leaf_age = ages[LEAF_GEN(leaf)];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2845) 		if (leaf_age <= tree->maxage && leaf_age < age)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2846) 			age = leaf_age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2847) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2848) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2849) 	return age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2850) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2851) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2852) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2853)  * Maximum age of any character in s, touch at most len bytes.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2854)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2855)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2856) int utf8nagemax(struct tree *tree, const char *s, size_t len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2857) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2858) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2859) 	int		age = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2860) 	int		leaf_age;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2861) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2862) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2863) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2864) 		return -1;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2865) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2866)         while (len && *s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2867) 		leaf = utf8nlookup(tree, hangul, s, len);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2868) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2869) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2870) 		leaf_age = ages[LEAF_GEN(leaf)];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2871) 		if (leaf_age <= tree->maxage && leaf_age > age)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2872) 			age = leaf_age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2873) 		len -= utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2874) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2875) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2876) 	return age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2877) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2878) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2879) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2880)  * Maximum age of any character in s, touch at most len bytes.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2881)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2882)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2883) int utf8nagemin(struct tree *tree, const char *s, size_t len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2884) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2885) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2886) 	int		leaf_age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2887) 	int		age;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2888) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2889) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2890) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2891) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2892) 	age = tree->maxage;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2893)         while (len && *s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2894) 		leaf = utf8nlookup(tree, hangul, s, len);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2895) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2896) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2897) 		leaf_age = ages[LEAF_GEN(leaf)];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2898) 		if (leaf_age <= tree->maxage && leaf_age < age)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2899) 			age = leaf_age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2900) 		len -= utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2901) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2902) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2903) 	return age;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2904) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2905) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2906) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2907)  * Length of the normalization of s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2908)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2909)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2910)  * A string of Default_Ignorable_Code_Point has length 0.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2911)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2912) ssize_t utf8len(struct tree *tree, const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2913) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2914) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2915) 	size_t		ret = 0;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2916) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2917) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2918) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2919) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2920) 	while (*s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2921) 		leaf = utf8lookup(tree, hangul, s);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2922) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2923) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2924) 		if (ages[LEAF_GEN(leaf)] > tree->maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2925) 			ret += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2926) 		else if (LEAF_CCC(leaf) == DECOMPOSE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2927) 			ret += strlen(LEAF_STR(leaf));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2928) 		else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2929) 			ret += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2930) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2931) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2932) 	return ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2933) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2934) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2935) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2936)  * Length of the normalization of s, touch at most len bytes.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2937)  * Return -1 if s is not valid UTF-8 unicode.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2938)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2939) ssize_t utf8nlen(struct tree *tree, const char *s, size_t len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2940) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2941) 	utf8leaf_t	*leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2942) 	size_t		ret = 0;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2943) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2944) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2945) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2946) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2947) 	while (len && *s) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2948) 		leaf = utf8nlookup(tree, hangul, s, len);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2949) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2950) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2951) 		if (ages[LEAF_GEN(leaf)] > tree->maxage)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2952) 			ret += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2953) 		else if (LEAF_CCC(leaf) == DECOMPOSE)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2954) 			ret += strlen(LEAF_STR(leaf));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2955) 		else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2956) 			ret += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2957) 		len -= utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2958) 		s += utf8clen(s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2959) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2960) 	return ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2961) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2962) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2963) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2964)  * Cursor structure used by the normalizer.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2965)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2966) struct utf8cursor {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2967) 	struct tree	*tree;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2968) 	const char	*s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2969) 	const char	*p;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2970) 	const char	*ss;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2971) 	const char	*sp;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2972) 	unsigned int	len;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2973) 	unsigned int	slen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2974) 	short int	ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2975) 	short int	nccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2976) 	unsigned int	unichar;
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 2977) 	unsigned char	hangul[UTF8HANGULLEAF];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2978) };
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2979) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2980) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2981)  * Set up an utf8cursor for use by utf8byte().
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2982)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2983)  *   s      : string.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2984)  *   len    : length of s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2985)  *   u8c    : pointer to cursor.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2986)  *   trie   : utf8trie_t to use for normalization.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2987)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2988)  * Returns -1 on error, 0 on success.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2989)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2990) int utf8ncursor(struct utf8cursor *u8c, struct tree *tree, const char *s,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2991) 		size_t len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2992) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2993) 	if (!tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2994) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2995) 	if (!s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2996) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2997) 	u8c->tree = tree;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2998) 	u8c->s = s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 2999) 	u8c->p = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3000) 	u8c->ss = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3001) 	u8c->sp = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3002) 	u8c->len = len;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3003) 	u8c->slen = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3004) 	u8c->ccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3005) 	u8c->nccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3006) 	u8c->unichar = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3007) 	/* Check we didn't clobber the maximum length. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3008) 	if (u8c->len != len)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3009) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3010) 	/* The first byte of s may not be an utf8 continuation. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3011) 	if (len > 0 && (*s & 0xC0) == 0x80)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3012) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3013) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3014) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3015) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3016) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3017)  * Set up an utf8cursor for use by utf8byte().
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3018)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3019)  *   s      : NUL-terminated string.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3020)  *   u8c    : pointer to cursor.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3021)  *   trie   : utf8trie_t to use for normalization.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3022)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3023)  * Returns -1 on error, 0 on success.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3024)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3025) int utf8cursor(struct utf8cursor *u8c, struct tree *tree, const char *s)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3026) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3027) 	return utf8ncursor(u8c, tree, s, (unsigned int)-1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3028) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3029) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3030) /*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3031)  * Get one byte from the normalized form of the string described by u8c.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3032)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3033)  * Returns the byte cast to an unsigned char on succes, and -1 on failure.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3034)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3035)  * The cursor keeps track of the location in the string in u8c->s.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3036)  * When a character is decomposed, the current location is stored in
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3037)  * u8c->p, and u8c->s is set to the start of the decomposition. Note
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3038)  * that bytes from a decomposition do not count against u8c->len.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3039)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3040)  * Characters are emitted if they match the current CCC in u8c->ccc.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3041)  * Hitting end-of-string while u8c->ccc == STOPPER means we're done,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3042)  * and the function returns 0 in that case.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3043)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3044)  * Sorting by CCC is done by repeatedly scanning the string.  The
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3045)  * values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3046)  * the start of the scan.  The first pass finds the lowest CCC to be
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3047)  * emitted and stores it in u8c->nccc, the second pass emits the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3048)  * characters with this CCC and finds the next lowest CCC. This limits
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3049)  * the number of passes to 1 + the number of different CCCs in the
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3050)  * sequence being scanned.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3051)  *
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3052)  * Therefore:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3053)  *  u8c->p  != NULL -> a decomposition is being scanned.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3054)  *  u8c->ss != NULL -> this is a repeating scan.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3055)  *  u8c->ccc == -1  -> this is the first scan of a repeating scan.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3056)  */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3057) int utf8byte(struct utf8cursor *u8c)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3058) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3059) 	utf8leaf_t *leaf;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3060) 	int ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3061) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3062) 	for (;;) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3063) 		/* Check for the end of a decomposed character. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3064) 		if (u8c->p && *u8c->s == '\0') {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3065) 			u8c->s = u8c->p;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3066) 			u8c->p = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3067) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3068) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3069) 		/* Check for end-of-string. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3070) 		if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3071) 			/* There is no next byte. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3072) 			if (u8c->ccc == STOPPER)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3073) 				return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3074) 			/* End-of-string during a scan counts as a stopper. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3075) 			ccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3076) 			goto ccc_mismatch;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3077) 		} else if ((*u8c->s & 0xC0) == 0x80) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3078) 			/* This is a continuation of the current character. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3079) 			if (!u8c->p)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3080) 				u8c->len--;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3081) 			return (unsigned char)*u8c->s++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3082) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3083) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3084) 		/* Look up the data for the current character. */
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3085) 		if (u8c->p) {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3086) 			leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3087) 		} else {
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3088) 			leaf = utf8nlookup(u8c->tree, u8c->hangul,
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3089) 					   u8c->s, u8c->len);
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3090) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3091) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3092) 		/* No leaf found implies that the input is a binary blob. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3093) 		if (!leaf)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3094) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3095) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3096) 		/* Characters that are too new have CCC 0. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3097) 		if (ages[LEAF_GEN(leaf)] > u8c->tree->maxage) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3098) 			ccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3099) 		} else if ((ccc = LEAF_CCC(leaf)) == DECOMPOSE) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3100) 			u8c->len -= utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3101) 			u8c->p = u8c->s + utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3102) 			u8c->s = LEAF_STR(leaf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3103) 			/* Empty decomposition implies CCC 0. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3104) 			if (*u8c->s == '\0') {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3105) 				if (u8c->ccc == STOPPER)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3106) 					continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3107) 				ccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3108) 				goto ccc_mismatch;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3109) 			}
a8384c68797ee scripts/mkutf8data.c (Olaf Weber              2019-04-25 13:49:18 -0400 3110) 			leaf = utf8lookup(u8c->tree, u8c->hangul, u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3111) 			ccc = LEAF_CCC(leaf);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3112) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3113) 		u8c->unichar = utf8decode(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3114) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3115) 		/*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3116) 		 * If this is not a stopper, then see if it updates
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3117) 		 * the next canonical class to be emitted.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3118) 		 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3119) 		if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3120) 			u8c->nccc = ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3121) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3122) 		/*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3123) 		 * Return the current byte if this is the current
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3124) 		 * combining class.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3125) 		 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3126) 		if (ccc == u8c->ccc) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3127) 			if (!u8c->p)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3128) 				u8c->len--;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3129) 			return (unsigned char)*u8c->s++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3130) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3131) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3132) 		/* Current combining class mismatch. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3133) 	ccc_mismatch:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3134) 		if (u8c->nccc == STOPPER) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3135) 			/*
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3136) 			 * Scan forward for the first canonical class
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3137) 			 * to be emitted.  Save the position from
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3138) 			 * which to restart.
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3139) 			 */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3140) 			assert(u8c->ccc == STOPPER);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3141) 			u8c->ccc = MINCCC - 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3142) 			u8c->nccc = ccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3143) 			u8c->sp = u8c->p;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3144) 			u8c->ss = u8c->s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3145) 			u8c->slen = u8c->len;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3146) 			if (!u8c->p)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3147) 				u8c->len -= utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3148) 			u8c->s += utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3149) 		} else if (ccc != STOPPER) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3150) 			/* Not a stopper, and not the ccc we're emitting. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3151) 			if (!u8c->p)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3152) 				u8c->len -= utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3153) 			u8c->s += utf8clen(u8c->s);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3154) 		} else if (u8c->nccc != MAXCCC + 1) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3155) 			/* At a stopper, restart for next ccc. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3156) 			u8c->ccc = u8c->nccc;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3157) 			u8c->nccc = MAXCCC + 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3158) 			u8c->s = u8c->ss;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3159) 			u8c->p = u8c->sp;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3160) 			u8c->len = u8c->slen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3161) 		} else {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3162) 			/* All done, proceed from here. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3163) 			u8c->ccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3164) 			u8c->nccc = STOPPER;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3165) 			u8c->sp = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3166) 			u8c->ss = NULL;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3167) 			u8c->slen = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3168) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3169) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3170) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3171) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3172) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3173) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3174) static int normalize_line(struct tree *tree)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3175) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3176) 	char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3177) 	char *t;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3178) 	int c;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3179) 	struct utf8cursor u8c;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3180) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3181) 	/* First test: null-terminated string. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3182) 	s = buf2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3183) 	t = buf3;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3184) 	if (utf8cursor(&u8c, tree, s))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3185) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3186) 	while ((c = utf8byte(&u8c)) > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3187) 		if (c != (unsigned char)*t++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3188) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3189) 	if (c < 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3190) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3191) 	if (*t != 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3192) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3193) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3194) 	/* Second test: length-limited string. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3195) 	s = buf2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3196) 	/* Replace NUL with a value that will cause an error if seen. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3197) 	s[strlen(s) + 1] = -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3198) 	t = buf3;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3199) 	if (utf8cursor(&u8c, tree, s))
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3200) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3201) 	while ((c = utf8byte(&u8c)) > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3202) 		if (c != (unsigned char)*t++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3203) 			return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3204) 	if (c < 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3205) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3206) 	if (*t != 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3207) 		return -1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3208) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3209) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3210) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3211) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3212) static void normalization_test(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3213) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3214) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3215) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3216) 	struct unicode_data *data;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3217) 	char *s;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3218) 	char *t;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3219) 	int ret;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3220) 	int ignorables;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3221) 	int tests = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3222) 	int failures = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3223) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3224) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3225) 		printf("Parsing %s\n", test_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3226) 	/* Step one, read data from file. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3227) 	file = fopen(test_name, "r");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3228) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3229) 		open_fail(test_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3230) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3231) 	while (fgets(line, LINESIZE, file)) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3232) 		ret = sscanf(line, "%[^;];%*[^;];%[^;];%*[^;];%*[^;];",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3233) 			     buf0, buf1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3234) 		if (ret != 2 || *line == '#')
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3235) 			continue;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3236) 		s = buf0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3237) 		t = buf2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3238) 		while (*s) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3239) 			unichar = strtoul(s, &s, 16);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3240) 			t += utf8encode(t, unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3241) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3242) 		*t = '\0';
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3243) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3244) 		ignorables = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3245) 		s = buf1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3246) 		t = buf3;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3247) 		while (*s) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3248) 			unichar = strtoul(s, &s, 16);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3249) 			data = &unicode_data[unichar];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3250) 			if (data->utf8nfdi && !*data->utf8nfdi)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3251) 				ignorables = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3252) 			else
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3253) 				t += utf8encode(t, unichar);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3254) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3255) 		*t = '\0';
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3256) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3257) 		tests++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3258) 		if (normalize_line(nfdi_tree) < 0) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3259) 			printf("Line %s -> %s", buf0, buf1);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3260) 			if (ignorables)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3261) 				printf(" (ignorables removed)");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3262) 			printf(" failure\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3263) 			failures++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3264) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3265) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3266) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3267) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3268) 		printf("Ran %d tests with %d failures\n", tests, failures);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3269) 	if (failures)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3270) 		file_fail(test_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3271) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3272) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3273) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3274) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3275) static void write_file(void)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3276) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3277) 	FILE *file;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3278) 	int i;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3279) 	int j;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3280) 	int t;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3281) 	int gen;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3282) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3283) 	if (verbose > 0)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3284) 		printf("Writing %s\n", utf8_name);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3285) 	file = fopen(utf8_name, "w");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3286) 	if (!file)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3287) 		open_fail(utf8_name, errno);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3288) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3289) 	fprintf(file, "/* This file is generated code, do not edit. */\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3290) 	fprintf(file, "#ifndef __INCLUDED_FROM_UTF8NORM_C__\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3291) 	fprintf(file, "#error Only nls_utf8-norm.c should include this file.\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3292) 	fprintf(file, "#endif\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3293) 	fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3294) 	fprintf(file, "static const unsigned int utf8vers = %#x;\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3295) 		unicode_maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3296) 	fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3297) 	fprintf(file, "static const unsigned int utf8agetab[] = {\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3298) 	for (i = 0; i != ages_count; i++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3299) 		fprintf(file, "\t%#x%s\n", ages[i],
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3300) 			ages[i] == unicode_maxage ? "" : ",");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3301) 	fprintf(file, "};\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3302) 	fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3303) 	fprintf(file, "static const struct utf8data utf8nfdicfdata[] = {\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3304) 	t = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3305) 	for (gen = 0; gen < ages_count; gen++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3306) 		fprintf(file, "\t{ %#x, %d }%s\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3307) 			ages[gen], trees[t].index,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3308) 			ages[gen] == unicode_maxage ? "" : ",");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3309) 		if (trees[t].maxage == ages[gen])
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3310) 			t += 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3311) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3312) 	fprintf(file, "};\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3313) 	fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3314) 	fprintf(file, "static const struct utf8data utf8nfdidata[] = {\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3315) 	t = 1;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3316) 	for (gen = 0; gen < ages_count; gen++) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3317) 		fprintf(file, "\t{ %#x, %d }%s\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3318) 			ages[gen], trees[t].index,
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3319) 			ages[gen] == unicode_maxage ? "" : ",");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3320) 		if (trees[t].maxage == ages[gen])
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3321) 			t += 2;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3322) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3323) 	fprintf(file, "};\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3324) 	fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3325) 	fprintf(file, "static const unsigned char utf8data[%zd] = {\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3326) 		utf8data_size);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3327) 	t = 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3328) 	for (i = 0; i != utf8data_size; i += 16) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3329) 		if (i == trees[t].index) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3330) 			fprintf(file, "\t/* %s_%x */\n",
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3331) 				trees[t].type, trees[t].maxage);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3332) 			if (t < trees_count-1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3333) 				t++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3334) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3335) 		fprintf(file, "\t");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3336) 		for (j = i; j != i + 16; j++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3337) 			fprintf(file, "0x%.2x%s", utf8data[j],
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3338) 				(j < utf8data_size -1 ? "," : ""));
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3339) 		fprintf(file, "\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3340) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3341) 	fprintf(file, "};\n");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3342) 	fclose(file);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3343) }
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3344) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3345) /* ------------------------------------------------------------------ */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3346) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3347) int main(int argc, char *argv[])
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3348) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3349) 	unsigned int unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3350) 	int opt;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3351) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3352) 	argv0 = argv[0];
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3353) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3354) 	while ((opt = getopt(argc, argv, "a:c:d:f:hn:o:p:t:v")) != -1) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3355) 		switch (opt) {
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3356) 		case 'a':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3357) 			age_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3358) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3359) 		case 'c':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3360) 			ccc_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3361) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3362) 		case 'd':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3363) 			data_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3364) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3365) 		case 'f':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3366) 			fold_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3367) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3368) 		case 'n':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3369) 			norm_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3370) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3371) 		case 'o':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3372) 			utf8_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3373) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3374) 		case 'p':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3375) 			prop_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3376) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3377) 		case 't':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3378) 			test_name = optarg;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3379) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3380) 		case 'v':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3381) 			verbose++;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3382) 			break;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3383) 		case 'h':
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3384) 			help();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3385) 			exit(0);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3386) 		default:
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3387) 			usage();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3388) 		}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3389) 	}
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3390) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3391) 	if (verbose > 1)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3392) 		help();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3393) 	for (unichar = 0; unichar != 0x110000; unichar++)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3394) 		unicode_data[unichar].code = unichar;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3395) 	age_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3396) 	ccc_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3397) 	nfdi_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3398) 	nfdicf_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3399) 	ignore_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3400) 	corrections_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3401) 	hangul_decompose();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3402) 	nfdi_decompose();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3403) 	nfdicf_decompose();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3404) 	utf8_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3405) 	trees_init();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3406) 	trees_populate();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3407) 	trees_reduce();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3408) 	trees_verify();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3409) 	/* Prevent "unused function" warning. */
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3410) 	(void)lookup(nfdi_tree, " ");
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3411) 	if (verbose > 2)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3412) 		tree_walk(nfdi_tree);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3413) 	if (verbose > 2)
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3414) 		tree_walk(nfdicf_tree);
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3415) 	normalization_test();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3416) 	write_file();
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3417) 
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3418) 	return 0;
955405d1174ee scripts/mkutf8data.c (Gabriel Krisman Bertazi 2019-04-25 13:38:44 -0400 3419) }