cScm Configuration Daemon

cScm – is a tool to convert SCM configuration files into binary format and store its in shared memory for reading by cSvn-ui and cGit-ui CGI scripts

2 Commits   0 Branches   1 Tag
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   1) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   2) #ifdef HAVE_CONFIG_H
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   3) #include <config.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   4) #endif
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   5) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   6) #include <stdlib.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   7) #include <stdio.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   8) #include <unistd.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300   9) #include <string.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  10) #include <stdarg.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  11) #include <limits.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  12) #include <locale.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  13) #include <wchar.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  14) #include <wctype.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  15) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  16) #include <defs.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  17) #include <utf8ing.h>
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  18) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  19) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  20) static const ucs4_t replacement_char     = 0xfffd;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  21) static const ucs4_t maximum_ucs4         = 0x7fffffff;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  22) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  23) static const int    half_shift           = 10;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  24) static const ucs4_t half_base            = 0x0010000;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  25) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  26) static const ucs4_t surrogate_high_start = 0xd800;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  27) static const ucs4_t surrogate_high_end   = 0xdbff;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  28) static const ucs4_t surrogate_low_start  = 0xdc00;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  29) static const ucs4_t surrogate_low_end    = 0xdfff;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  30) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  31) static utf8_t
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  32) first_byte_mark[7] = { 0x00, 0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  33) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  34) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  35) /***************************************************************
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  36)   static copy_ucs4_to_utf8()
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  37) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  38)      Переводит строку символов UCS4( src ) в UTF8( dest ).
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  39) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  40)      Возвращаемое значение:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  41)         Количество байт, реально записанное в DEST.
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  42) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  43)      NOTE:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  44)         Выход за пределы памяти, выделенной под указатель DEST
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  45)         не контролируются.
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  46)         Подразумевается, что строка SRC имеет null-терминатор.
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  47)  ***************************************************************/
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  48) int copy_ucs4_to_utf8( utf8_t *dest, const ucs4_t *src )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  49) {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  50)   utf8_t   target[7];
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  51)   utf8_t  *ptr;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  52)   int      count = 0;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  53) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  54)   while( *src )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  55)   {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  56)     ucs4_t        c;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  57)     int           bytes_to_write = 0;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  58)     const ucs4_t  byte_mask = 0xbf;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  59)     const ucs4_t  byte_mark = 0x80;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  60) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  61)     c = *src++;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  62) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  63)     if( c >= surrogate_high_start &&
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  64)         c <= surrogate_high_end   && *src )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  65)     {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  66)       ucs4_t c2 = *src;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  67) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  68)       if( c2 >= surrogate_low_start &&
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  69)           c2 <= surrogate_low_end      )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  70)       {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  71)         c = ((c  - surrogate_high_start) << half_shift) +
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  72)              (c2 - surrogate_low_start) + half_base;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  73)         ++src;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  74)       }
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  75)     }
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  76) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  77)          if( c <          0x80 ) bytes_to_write = 1;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  78)     else if( c <         0x800 ) bytes_to_write = 2;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  79)     else if( c <       0x10000 ) bytes_to_write = 3;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  80)     else if( c <      0x200000 ) bytes_to_write = 4;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  81)     else if( c <     0x4000000 ) bytes_to_write = 5;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  82)     else if( c <= maximum_ucs4 ) bytes_to_write = 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  83)     else
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  84)     {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  85)       bytes_to_write = 2;   c = replacement_char;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  86)     }
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  87) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  88)     ptr = &target[0] + bytes_to_write;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  89) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  90)     switch( bytes_to_write )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  91)     {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  92)       case 6:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  93)         *--ptr = (c | byte_mark) & byte_mask; c >>= 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  94)       case 5:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  95)         *--ptr = (c | byte_mark) & byte_mask; c >>= 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  96)       case 4:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  97)         *--ptr = (c | byte_mark) & byte_mask; c >>= 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  98)       case 3:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300  99)         *--ptr = (c | byte_mark) & byte_mask; c >>= 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 100)       case 2:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 101)         *--ptr = (c | byte_mark) & byte_mask; c >>= 6;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 102)       case 1:
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 103)         *--ptr = c | first_byte_mark[bytes_to_write];
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 104)     }
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 105) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 106)     ptr = &target[0];
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 107) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 108)     while( bytes_to_write > 0 )
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 109)     {
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 110)       *dest++ = *ptr++; /* write byte */
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 111)       --bytes_to_write;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 112)       ++count;
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 113)     }
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 114) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 115)   } /* End while( *src ) */
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 116) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 117)   *dest = (utf8_t)0; /* null terminator */
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 118) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 119)   return( count );
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 120) 
12c7b1c5 (kx 2023-03-24 02:53:04 +0300 121) } /* End of static copy_ucs4_to_utf8() */