.CM  SCRIPT , Version - 1.1 , last edited by holger
.ad 8
.bm 8
.fm 4
.bt $Copyright (c) 2000-2005 SAP AG$$Page %$
.tm 12
.hm 6
.hs 3
.tt 1 $SQL$Project Distributed Database System$VSP80$
.tt 2 $$$
.tt 3 $$RTE-Extension-80$2000-10-05$
***********************************************************
.nf
 
 .nf

.nf

    ========== licence begin  GPL
    Copyright (c) 2000-2005 SAP AG

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
    ========== licence end
.fo


.fo

 
.fo
.nf
.sp
Module  : RTE-Extension-80
=========
.sp
Purpose : National Language Support (NLS)
          Translates between any country-specific codesets
          (including multibyte-codesets and ISO8859) and
          UNICODE.
 
.CM *-END-* purpose -------------------------------------
.sp
.cp 3
Define  :
 
        PROCEDURE
              s80uni_trans
                    (src_ptr        : tsp00_MoveObjPtr;
                    src_len         : tsp00_Int4;
                    src_codeset     : tsp00_Int2;
                    dest_ptr        : tsp00_MoveObjPtr;
                    VAR dest_len    : tsp00_Int4;
                    dest_codeset    : tsp00_Int2;
                    trans_options   : tsp8_uni_opt_set;
                    VAR rc          : tsp8_uni_error;
                    VAR err_char_no : tsp00_Int4);
 
        FUNCTION
              s80uni_get_srcpos (
                    dest_pos        : tsp00_Int4;
                    src_ptr         : tsp00_MoveObjPtr;
                    src_len         : tsp00_Int4;
                    src_codeset     : tsp00_Int2;
                    VAR rc          : tsp8_uni_error;
                    VAR err_char_no : tsp00_Int4) : tsp00_Int4;
 
        FUNCTION
              s80uni_chartype(
                    str_ptr         : tsp00_MoveObjPtr;
                    VAR str_len     : tsp00_Int4;
                    str_codeset     : tsp00_Int2): tsp8_uni_error;
 
        PROCEDURE
              s80uni_charlayout(
                    src_ptr         : tsp00_MoveObjPtr;
                    src_len         : tsp00_Int4;
                    src_codeset     : tsp00_Int2;
                    dest_ptr        : tsp00_MoveObjPtr;
                    VAR rc          : tsp8_uni_error;
                    VAR err_char_no : tsp00_Int4);
 
        PROCEDURE
              s80uni_error (
                    rc      : tsp8_uni_error;
                    VAR msg : tsp00_C40);
 
        PROCEDURE
              s80ascii_check (
                    src_ptr         : tsp00_MoveObjPtr;
                    src_len         : tsp00_Int4;
                    src_codeset     : tsp00_Int2;
                    dest_ptr        : tsp00_MoveObjPtr;
                    VAR dest_len    : tsp00_Int4;
                    VAR rc          : tsp8_uni_error;
                    VAR err_char_no : tsp00_Int4);
 
.CM *-END-* define --------------------------------------
.sp;.cp 3
Use     :
 
        FROM
              Kernel_move_and_fill : VGG101;
 
        PROCEDURE
              SAPDB_PascalForcedFill (
                    size        : tsp00_Int4;
                    m           : tsp00_MoveObjPtr;
                    pos         : tsp00_Int4;
                    len         : tsp00_Int4;
                    fillchar    : char);
 
        PROCEDURE
              SAPDB_PascalForcedOverlappingMove (
                    source_upb  : tsp00_Int4;
                    destin_upb  : tsp00_Int4;
                    source      : tsp00_MoveObjPtr;
                    source_pos  : tsp00_Int4;
                    destin      : tsp00_MoveObjPtr;
                    destin_pos  : tsp00_Int4;
                    length      : tsp00_Int4);
 
        PROCEDURE
              s10mv (
                    source_upb  : tsp00_Int4;       
                    destin_upb  : tsp00_Int4;
                    source      : tsp00_MoveObjPtr;    
                    source_pos  : tsp00_Int4;
                    destin      : tsp00_MoveObjPtr;    
                    destin_pos  : tsp00_Int4;
                    length      : tsp00_Int4);
 
      ------------------------------ 
 
        FROM
              RTE-Extension-30 : VSP30;
 
        FUNCTION
              s30lnr (VAR str : tsp00_MoveObj;
                    skip_val  : char;
                    start_pos : tsp00_Int4;
                    length    : tsp00_Int4) : tsp00_Int4;
 
        FUNCTION
              s30unilnr (str  : tsp00_MoveObjPtr;
                    skip_val  : tsp00_C2;
                    start_pos : tsp00_Int4;
                    length    : tsp00_Int4) : tsp00_Int4;
 
      ------------------------------ 
 
        FROM
              RTE-Extension-81 : VSP81;
 
        PROCEDURE
              sp81UCS2QuotesContainingStringToupper (
                    buffer_ptr         : tsp00_MoveObjPtr;
                    buffer_len         : tsp00_Int4);
 
        PROCEDURE
              sp81UCS2QuotesContainingSwappedStringToupper (
                    buffer_ptr         : tsp00_MoveObjPtr;
                    buffer_len         : tsp00_Int4);
 
.CM *-END-* use -----------------------------------------
.sp;.cp 3
Synonym :
 
.CM *-END-* synonym -------------------------------------
.sp;.cp 3
Author  :
.sp
.cp 3
Created : 1995-08-14
.sp
.cp 3
Version : 2002-08-26
.sp
.cp 3
Release :      Date : 2000-10-05
.sp
***********************************************************
.sp
.cp 10
.fo
.oc _/1
Specification:
 
.CM *-END-* specification -------------------------------
.sp 2
***********************************************************
.sp
.cp 10
.fo
.oc _/1
.nf
Description:
 
1) s80uni_trans
===============
 
a) Purpose
  Translate a character-string between any national characterset and UNICODE.
 
 
b) Parameter
  src_ptr:       points to input string that is to be translated
 
  src_len:       length of input-string (bytes) to be translated
 
  src_codeset:   key generated by "s80uni_fload" that specifies the
                 character-set to be translated.
 
  dest_ptr:      use to indicate where you want "s80uni_load" to store
                 the output of the translation (caller must supply
                 storage).
 
  dest_len:      use to indicate the length of the field supplied
                 in "DEST_PTR".
 
  dest_codeset:  key generated by "s80uni_fload" that specifies the
                 UNICODE-characterset.
 
  trans_options: options that influence "s80uni_load" processing:
 
                 "uni_upper": map all characters that are NOT enclosed
                              in single-quotes to uppercase when trans-
                              lating TO UNICODE.
 
                 "uni_fill":  if "DEST_LEN" is longer than the result of the
                              translation then fillup the output-field at
                              "DEST_PTR" using the default-fill-character
                              of this character-set (ISO8859 = x'20',
                              UNICODE = U0020).
 
  rc:            return-code  (see vsp008 for details)
 
                 "uni_not_translatable": Input-string contains unknown
                                         characters. Translation stopped.
 
                 "uni_dest_too_short":   self-explaining
 
  err_char_no:   gives the number of characters translated before an
                 mapping-error (RC = uni_not_translatable) occured.
                 This option is intended to help searching translate-
                 errors in multibyte-strings (MBCS).
 
 
c) Usage suggestions
  Before "s80uni_trans" can be can be used, the data-structures
  and mapping tables to translate between any characterset and UNICODE
  must be loaded into memory.
 
  On client-side this can be done by calling "sp82uni_fload", specifying
  which codeset to load. "sp82uni_fload" must be called once for each
  characterset that is to be loaded and reads from a file in $DBROOT/env
 
  The calling sequence is as following:
 
   1. Load the codeset specified by "iso_locale_str" (eg. "JA_JPN.SJIS"
      or "ISO8859_1") into memory:
 
      sp82uni_fload (encodings_ptr, encoding_cnt, iso_locale_str,
         codeset, rc)
 
    "sp82uni_fload" returns one integer: "codeset" that must be passed to
    "sp80uni_load". "codeset" is a key that identifies the codeset
    "iso_locale_str".
 
   2. Do the actual translation between any-codeset and UNICODE or vice versa:
 
     sp80uni_load   (src_ptr, src_len,
         src_codeset, dest_ptr, dest_len, dest_codeset, trans_options, rc
         err_char_no)
 
     The order of the parameters "src_codeset" and "dest_codeset" tells
     "s80uni_trans" about the desired direction of the translation:
 
     sp80uniload (...codeset, csp_unicode...) = any -> UNICODE
     sp80uniload (...csp_unicode, codeset...) = UNICODE -> any
 
 
d) change history
  10JAN96  buchta   add multiple value_len-support
  20FEB96  buchta   rewrite to use multiple unicode-variants
 
 
2. s80uni_get_srcpos
====================
 
a) Purpose
  Return byte-position in source-string that corresponds to a given
  byte_position in dest-string (dest_pos).
 
 
3. s80uni_chartype
==================
 
a) Purpose
  Determine if a given source-string contains a singlebyte- or a
  multibyte-character. If source-string contains unknown characters
  return 'not translatable'.
 
b) Supply Parameter
  - Source-string is specified by 'src_ptr', 'src_len' and 'src_
    codeset'
 
c) Return Parameter
  Function returns:
    'uni_not_translatable' if unknown character is found
    'uni_is_singlebyte' if singlebyte character is found and 'src_len' is
                        set to 1
    'uni_is_multibyte' if multibyte character is found and 'src_len' is
                        set to the length of the multibyte character
    'uni_is_incomplete' if the first 'n-1' characters of a multibyte-character
                        are found. At character 'n' (last character)
                        'uni_is_multibyte' is returned if the multibyte-
                        string has been recognized sucessfully.
 
 
3. s80uni_charlayout
====================
 
a) Purpose
  Scan a source-string and mark single byte-characters with x'00', the
  first byte of multibyte-characters with x'00' and all following bytes
  of multibyte-characters with x'01'.
 
b) Supply-Parameter
  Source-string is specified by 'src_ptr', 'src_len' and 'src_codeset'
 
c) Return-Parameter
  Target-string consisting of x'00' and x'01' is written to 'dest_ptr'.
  (Source- and target-string lengths are equal)
 
  Return codes im parameter 'rc':
      'uni_not_translatable'   = Unknown character in source-string.
                                 Action: scanning is stopped. Number of
                                 scanned character is returned in
                                 'err_char_no'
      'no_such_encoding'       = check parameter 'src_codeset'. Should
                                 be set to the return-value of 'sp82uni_
                                 fload'
 
4) References
=============
 
vsp008         data structures for translation
vak53          load mapping-tables from DB into kernel-memory
vsp82          load mapping-tables from file into client-memory
vni70c         UNICODE-compiler, generates mapping-tables
 
 
change history
==============
 
10JAN96  buchta   add multiple value_len-support
20FEB96  buchta   rewrite to use multiple unicode-variants
19AUG96  holger   change err_char_no to use byte-pos
 
.CM *-END-* description ---------------------------------
.sp 2
***********************************************************
.sp
.cp 10
.nf
.oc _/1
Structure:
 
.CM *-END-* structure -----------------------------------
.sp 2
**********************************************************
.sp
.cp 10
.nf
.oc _/1
.CM -lll-
Code    :
 
 
CONST
      c_not_trans_mark = '?';
 
 
(*------------------------------*) 
 
PROCEDURE
      s80uni_trans (
            src_ptr         : tsp00_MoveObjPtr;
            src_len         : tsp00_Int4;
            src_codeset     : tsp00_Int2;
            dest_ptr        : tsp00_MoveObjPtr;
            VAR dest_len    : tsp00_Int4;
            dest_codeset    : tsp00_Int2;
            trans_options   : tsp8_uni_opt_set;
            VAR rc          : tsp8_uni_error;
            VAR err_char_no : tsp00_Int4);
 
CONST
      c2_uniswap_blank = '\20\00';
 
VAR
      wanted_len          : tsp00_Int4;
      trans_len           : tsp00_Int4;
      set_warning         : boolean;
 
BEGIN
set_warning := false;
err_char_no := 1;
IF  ((src_codeset in [ csp_unicode, csp_unicode_swap ]) AND
    (src_len MOD 2 <> 0))
THEN
    BEGIN
    rc := uni_is_incomplete;
    END
ELSE
    BEGIN
    rc := uni_ok;
    wanted_len := dest_len;
    (* h.b. 5.11.1996
          *  siehe PTS Fehlermeldung
          *  fuer den Precompiler werden abschliessende Blanks entfernt *)
    IF  (rc = uni_ok) AND (uni_del_trailing_blanks in trans_options)
    THEN
        BEGIN
        IF  src_codeset = csp_unicode
        THEN
            src_len := s30unilnr (src_ptr, csp_unicode_blank,
                  1, src_len)
        ELSE
            IF  src_codeset = csp_unicode_swap
            THEN
                src_len := s30unilnr (src_ptr, c2_uniswap_blank,
                      1, src_len)
            ELSE
                src_len := s30lnr (src_ptr^, bsp_c1, 1, src_len);
            (*ENDIF*) 
        (*ENDIF*) 
        END;
    (*ENDIF*) 
    IF  src_len > 0
    THEN
        BEGIN
        IF  src_codeset = dest_codeset
        THEN
            IF  dest_len >= src_len
            THEN
                BEGIN
                SAPDB_PascalForcedOverlappingMove (src_len, src_len, @src_ptr^, 1, @dest_ptr^, 1, src_len);
                dest_len := src_len
                END
            ELSE
                (* PTS 1117437 E.Z. *)
                BEGIN
                SAPDB_PascalForcedOverlappingMove (src_len, src_len, @src_ptr^, 1, @dest_ptr^, 1, dest_len);
                err_char_no := dest_len + 1;
                rc := uni_dest_too_short
                END
            (*ENDIF*) 
        ELSE
            IF  ( src_codeset in [ csp_ascii, csp_unicode, csp_unicode_swap ]) AND
                (dest_codeset in [ csp_ascii, csp_unicode, csp_unicode_swap ])
            THEN
                sp80_ascii_trans (src_ptr, src_len, src_codeset,
                      dest_ptr, dest_len, dest_codeset, rc, err_char_no)
            ELSE
                rc := uni_no_such_encoding;
            (*ENDIF*) 
        (*ENDIF*) 
        (* upper case translation (not in single or double quotes) *)
        IF  ( uni_change_to_upper in trans_options ) AND
            (dest_codeset = csp_unicode)
        THEN
            BEGIN
            sp81UCS2QuotesContainingStringToupper(dest_ptr, dest_len);
            END;
        (*ENDIF*) 
        IF  ( uni_change_to_upper in trans_options ) AND
            (dest_codeset = csp_unicode_swap)
        THEN
            BEGIN
            sp81UCS2QuotesContainingSwappedStringToupper(dest_ptr, dest_len);
            END;
        (*ENDIF*) 
        END
    ELSE
        dest_len := 0;
    (*ENDIF*) 
    IF  (rc = uni_ok) AND
        (uni_fillup_field in trans_options) AND
        (dest_len < wanted_len)
    THEN
        BEGIN
        trans_len := dest_len;
        IF  dest_codeset = csp_ascii
        THEN
            SAPDB_PascalForcedFill (wanted_len, @dest_ptr^, dest_len+1,
                  wanted_len - dest_len, csp_ascii_blank)
        ELSE
            IF  dest_codeset = csp_unicode
            THEN
                WHILE dest_len + 2 <= wanted_len DO
                    BEGIN
                    dest_ptr^[ dest_len + 1 ] := csp_unicode_mark;
                    dest_ptr^[ dest_len + 2 ] := csp_ascii_blank;
                    dest_len := dest_len + 2
                    END
                (*ENDWHILE*) 
            ELSE
                IF  dest_codeset = csp_unicode_swap
                THEN
                    WHILE dest_len + 2 <= wanted_len DO
                        BEGIN
                        dest_ptr^[ dest_len + 1 ] := csp_ascii_blank;
                        dest_ptr^[ dest_len + 2 ] := csp_unicode_mark;
                        dest_len := dest_len + 2
                        END;
                    (*ENDWHILE*) 
                (*ENDIF*) 
            (*ENDIF*) 
        (*ENDIF*) 
        dest_len := trans_len;
        END;
    (*ENDIF*) 
    END;
(*ENDIF*) 
IF  set_warning
THEN
    rc := uni_translate_warning;
(*ENDIF*) 
END;
 
(*------------------------------*) 
 
PROCEDURE
      sp80_ascii_trans (src_ptr         : tsp00_MoveObjPtr;
            src_len         : tsp00_Int4;
            src_codeset     : tsp00_Int2;
            dest_ptr        : tsp00_MoveObjPtr;
            VAR dest_len    : tsp00_Int4;
            dest_codeset    : tsp00_Int2;
            VAR rc          : tsp8_uni_error;
            VAR err_char_no : tsp00_Int4);
 
VAR
      c                   : char;
      i                   : tsp00_Int4;
      j                   : tsp00_Int4;
      low_pos             : integer;
      high_pos            : integer;
 
BEGIN
rc := uni_ok;
err_char_no := 1;
IF  ((src_codeset = csp_unicode) AND
    (dest_codeset = csp_unicode_swap))
    OR
    ((src_codeset = csp_unicode_swap) AND
    (dest_codeset = csp_unicode))
THEN
    BEGIN
    j := 0;
    WHILE ((j+2) <= src_len) AND ((j+2) <= dest_len) DO
        BEGIN
        (* looks funny, but if src_ptr and dest_ptr are the same, *)
        (* this is necessary                                      *)
        c                := src_ptr^[ j+1 ];
        dest_ptr^[ j+1 ] := src_ptr^[ j+2 ];
        dest_ptr^[ j+2 ] := c;
        j                := j + 2;
        err_char_no      := j;
        END;
    (*ENDWHILE*) 
    IF  (src_len > j)
    THEN
        BEGIN
        rc := uni_dest_too_short;
        err_char_no := j + 1;
        END;
    (*ENDIF*) 
    dest_len := j;
    END
ELSE
    IF  (src_codeset = csp_ascii)
    THEN
        BEGIN
        IF  (dest_codeset = csp_unicode)
        THEN
            BEGIN
            low_pos   := 1;
            high_pos := 2;
            END
        ELSE
            BEGIN
            low_pos   := 2;
            high_pos := 1;
            END;
        (*ENDIF*) 
        i := 1;
        j := 0;
        WHILE (i <= src_len) AND (j + 2 <= dest_len)
              AND (rc = uni_ok) DO
            BEGIN
            dest_ptr^ [ low_pos  + j ] := csp_unicode_mark;
            dest_ptr^ [ high_pos + j ] := src_ptr^ [i];
            j := j + 2;
            err_char_no := i;
            i := succ(i);
            END;
        (*ENDWHILE*) 
        IF  i <= src_len
        THEN
            BEGIN
            rc := uni_dest_too_short;
            err_char_no := i;
            END;
        (*ENDIF*) 
        dest_len := j
        END
    ELSE
        BEGIN
        IF  (src_codeset = csp_unicode)
        THEN
            BEGIN
            low_pos  := 1;
            high_pos := 0;
            END
        ELSE
            BEGIN
            low_pos  := 0;
            high_pos := 1;
            END;
        (*ENDIF*) 
        i := 2;
        j := 1;
        WHILE (i <= src_len) AND (j <= dest_len)
              AND ((rc = uni_ok) OR (rc = uni_translate_warning)) DO
            BEGIN
            IF  src_ptr^ [ i-low_pos ] = csp_unicode_mark
            THEN
                dest_ptr^[ j ] := src_ptr^ [ i-high_pos ]
            ELSE
                BEGIN
                IF  rc = uni_ok
                THEN
                    err_char_no := i - 1;
                (*ENDIF*) 
                dest_ptr^[ j ] := c_not_trans_mark;
                rc := uni_translate_warning;
                END;
            (*ENDIF*) 
            i := i + 2;
            j := succ(j);
            END;
        (*ENDWHILE*) 
        IF  (i <= src_len) AND (rc = uni_ok)
        THEN
            BEGIN
            rc := uni_dest_too_short;
            err_char_no := i - 1;
            END;
        (*ENDIF*) 
        dest_len := pred(j);
        END;
    (*ENDIF*) 
(*ENDIF*) 
END;
 
(*------------------------------*) 
 
PROCEDURE
      s80ascii_check (
            src_ptr         : tsp00_MoveObjPtr;
            src_len         : tsp00_Int4;
            src_codeset     : tsp00_Int2;
            dest_ptr        : tsp00_MoveObjPtr;
            VAR dest_len    : tsp00_Int4;
            VAR rc          : tsp8_uni_error;
            VAR err_char_no : tsp00_Int4);
 
BEGIN
err_char_no := 1;
rc := uni_no_such_encoding;
END;
 
(*------------------------------*) 
 
FUNCTION
      s80uni_get_srcpos (
            dest_pos        : tsp00_Int4;
            src_ptr         : tsp00_MoveObjPtr;
            src_len         : tsp00_Int4;
            src_codeset     : tsp00_Int2;
            VAR rc          : tsp8_uni_error;
            VAR err_char_no : tsp00_Int4) : tsp00_Int4;
 
BEGIN
err_char_no := 1;
rc          := uni_ok;
IF  src_codeset = csp_ascii
THEN
    s80uni_get_srcpos := dest_pos DIV 2
ELSE
    rc := uni_no_such_encoding
(*ENDIF*) 
END;
 
(*------------------------------*) 
 
FUNCTION
      s80uni_chartype(
            str_ptr         : tsp00_MoveObjPtr;
            VAR str_len     : tsp00_Int4;
            str_codeset     : tsp00_Int2): tsp8_uni_error;
 
BEGIN
s80uni_chartype := uni_not_translatable;
IF  (str_codeset < csp_unicode_swap)
THEN
    s80uni_chartype := uni_is_singlebyte
ELSE
    IF  (str_codeset = csp_unicode) OR (str_codeset = csp_unicode_swap)
    THEN
        s80uni_chartype := uni_is_multibyte
    ELSE
        s80uni_chartype := uni_no_such_encoding;
    (*ENDIF*) 
(*ENDIF*) 
END;
 
(*------------------------------*) 
 
PROCEDURE
      s80uni_charlayout(
            src_ptr         : tsp00_MoveObjPtr;
            src_len         : tsp00_Int4;
            src_codeset     : tsp00_Int2;
            dest_ptr        : tsp00_MoveObjPtr;
            VAR rc          : tsp8_uni_error;
            VAR err_char_no : tsp00_Int4);
 
BEGIN
(* writeln; *)
rc := uni_no_such_encoding;
END;
 
(*------------------------------*) 
 
PROCEDURE
      s80uni_error (
            rc      : tsp8_uni_error;
            VAR msg : tsp00_C40);
 
BEGIN
CASE rc OF
    uni_ok:
        msg := 'uni_error: OK                           ';
    uni_no_such_encoding:
        msg := 'uni_error: no such encoding             ';
    uni_not_translatable:
        msg := 'uni_error: not translatable             ';
    uni_dest_too_short:
        msg := 'uni_error: dest too short               ';
    uni_src_too_short:
        msg := 'uni_error: src too short                ';
    uni_is_singlebyte:
        msg := 'uni_error: character is singlebyte      ';
    uni_is_multibyte:
        msg := 'uni_error: character is multibyte       ';
    uni_is_incomplete:
        msg := 'uni_error: character is incomplete      ';
    uni_translate_warning:
        msg := 'uni_error: translate warning            ';
    OTHERWISE
        msg := 'unknown uni_error received              '
    END
(*ENDCASE*) 
END;
 
.CM *-END-* code ----------------------------------------
.SP 2 
***********************************************************
*-PRETTY-*  statements    :        122
*-PRETTY-*  lines of code :        398        PRETTYX 3.10 
*-PRETTY-*  lines in file :        774         1997-12-10 
.PA 
