/*
 * addr_ext.h
 * ----------
 *
 * Constants and prototypes for address comparison.
 *
 * Copyright (c):
 * 2007-2008:  Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
 *
 * SCCS: @(#) addr_ext.h  1.2  2008-11-30
 *
 * This file is subject to the GNU Lesser General Public License (LGPL)
 * (formerly known as GNU Library General Public Licence)
 * as published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * This file is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this file; if not, write to the
 * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Actually, the LGPL is __less__ restrictive than the better known GNU General
 * Public License (GPL). See the GNU Library General Public License or the file
 * LIB_GPLA.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
 *
 * There is one important restriction: If you modify this program in any way
 * (e.g. modify the underlying logic or translate this program into another
 * programming language), you must also release the changes under the terms
 * of the LGPL.
 * (However, since __this__ program is intended to be customized, all changes
 * covered by TO-DO comments are free.)
 *
 * That means you have to give out the source code to your changes,
 * and a very good way to do so is mailing them to the address given below.
 * I think this is the best way to promote further development and use
 * of this software.
 *
 * If you have any remarks, feel free to e-mail to:
 *     ct@ct.heise.de
 *
 * The author's email address is:
 *    astro.joerg@googlemail.com
 */


#ifndef _ADDR_EXT_INCLUDED_

#define _ADDR_EXT_INCLUDED_


/****  char set = iso8859-1  ****/


/****  TO-DO: users outside Germany must change the following macro  ****/
#define DEFAULT_COUNTRY      "D"

/****  TO-DO:  If you don't want to use "gender.c"  ****/
/****  (available from www.heise.de/ct/07/17/182),  ****/
/****  de-activate the following macro:             ****/
  #define USE_GENDER

/****  TO-DO: If you don't want to use "phonet.c"   ****/
/****  (available from www.heise.de/ct/99/25/252),  ****/
/****  de-activate the following macro:             ****/
  #define USE_PHONET



#ifdef USE_PHONET
   #include "ph_ext.h"
   #undef PHONET_EXECUTABLE

   /****  TO-DO: non-German users must change the following macro:  ****/
   #define PHONET_MODE   (PHONET_FIRST_RULES+PHONET_GERMAN)

   #define PHONET_SHRINK_FACTOR  0.75
#else
   #define phonet(src,dest,len,mode_country)        \
                         up_expand(src,dest,len,COMPRESS_MULTIPLE_CHARS)

   #define PHONET_MODE            0
   #define PHONET_FIRST_RULES     0
   #define PHONET_NO_LANGUAGE     0
   #define PHONET_GERMAN          0
   #define PHONET_SHRINK_FACTOR  0.75
#endif

/****  The macro "SHRINK_FACTOR_PH" covers the       ****/
/****  information loss due to phonetic conversion   ****/
/****  TO-DO:  If you use the soundex algorithm,     ****/
/****  activate the following macro:                 ****/
   /****   #define SHRINK_FACTOR_PH  0.20   ****/
/****  TO-DO:  If you use any other wide phonetic    ****/
/****  transcription, activate the following macro:  ****/
   /****   #define SHRINK_FACTOR_PH  0.50   ****/



#ifdef USE_GENDER
   #include "gen_ext.h"
   #undef GENDER_EXECUTABLE

   /****  TO-DO: change the following macros, if necessary:  ****/
   #define GENDER_MODE             0
   #define GENDER_COUNTRY     GC_ANY_COUNTRY

#else
   #define get_gender(first_name,compare_mode,country)          0
   #define check_nickname(name_1,name_2,compare_mode,country)   0

   #define GENDER_MODE             0
   #define GENDER_ALLOW_COUPLE     0
   #define GENDER_NO_EXT_CONV      0

   #define GENDER_COUNTRY          0
   #define GC_GERMANY              0
   #define GC_AUSTRIA              0
   #define GC_SWISS                0
   #define GC_ANY_COUNTRY          0

   #define IS_FEMALE              'F'
   #define IS_MOSTLY_FEMALE       'f'
   #define IS_MALE                'M'
   #define IS_MOSTLY_MALE         'm'
   #define IS_UNISEX_NAME         '?'
   #define IS_COUPLE              'C'

   #define EQUIVALENT_NAMES       '='
   #define NOT_EQUAL_NAMES        '!'
   #define NAME_NOT_FOUND         ' '
   #define ERROR_IN_NAME          'E'
   #define INTERNAL_ERROR_GENDER  'I'
#endif
#define IS_COMPANY             'C'


/****  Field lengths (without '\0')     ****/
/****  TO-DO: Change them if necessary  ****/
#define LENGTH_FAM_NAME        50
#define LENGTH_C_O_NAME        40
#define LENGTH_STREET          40
#define LENGTH_ZIP_CODE        10
#define LENGTH_CITY            40
#define LENGTH_COUNTRY          4
#define LENGTH_PHONE_NUMBER    20
#define LENGTH_MOBILE_NUMBER   20
#define LENGTH_EMAIL_ADDR      50
#define LENGTH_IBAN_CODE       20
#define LENGTH_BANK_ACCOUNT    20
#define LENGTH_CUST_NUMBER     20

#ifdef LENGTH_FIRST_NAME
   /****  "LENGTH_FIRST_NAME" is defined in file "gen_ext.h"  ***/
#else
   #define LENGTH_FIRST_NAME   40
#endif

#define LENGTH_INTERNAL_VAR    50


/****  Minimum match (percent) for family member ****/
/****  TO-DO: Change this macro if necessary     ****/
#define MIN_IS_FAMILY_MEMBER   90

/****  We assume year of birth to be given as "yyyy".          ****/
/****  TO-DO: If it is "yy", change the following macro to "2" ****/
#define LENGTH_BIRTH_YEAR      4

/****  TO-DO: Change the following macros if necessary  ****/
#define DUMMY_BIRTHDAY    "0000-00-00"
#define UNKNOWN_STREET    "unbekannt"


/****  Weights (= hundredths of points) for the             ****/
/****  address comparison (function "compare_addr")         ****/
/****  TO-DO: Re-calculate weights, if necessary,           ****/
/****         with the program "demo":                      ****/
/****     demo -calculate_weight_from_file  <sorted_file>   ****/
/****  NOTE: This is strongly recommended for year of birth ****/
/****        ( = WEIGHT_B_YEAR)                             ****/
#define WEIGHT_GENDER          400
#define WEIGHT_COUNTRY         300
#define WEIGHT_B_DAY          1480
#define WEIGHT_B_MONTH        1070
/****  TO-DO: If you have a very narrow birthday distribution    ****/
/****  (e.g. students), set the following macro to (e.g.) 700 :  ****/
#define WEIGHT_B_YEAR         1420

#define WEIGHT_PHONE_NUMBER   5000
#define WEIGHT_MOBILE_NUMBER  5000
#define WEIGHT_EMAIL_ADDR     5000
#define WEIGHT_BANK_ACCOUNT   5000
#define WEIGHT_CUST_NUMBER    5000

/****  Weights (= hundredths of points) for the             ****/
/****  address comparison (function "compare_addr")         ****/
/****  Note:  These weights are defaults for "any country"  ****/
/****  NOTE: If you have addresses from different countries ****/
/****        in your database, you should calculate the     ****/
/****        weight separately for each (major) country     ****/
/****        and include the values in the array            ****/
/****        "WEIGHTS_ADDR_COMPARE" (also in this file)     ****/
#define AC_WEIGHT_FIRST_NAME   1650
#define AC_WEIGHT_FAM_NAME     1900
#define AC_WEIGHT_STREET       3700
#define AC_WEIGHT_ZIP_CODE     2100
#define AC_WEIGHT_CITY         1200


/****  Weights (= hundredths of points) for the     ****/
/****  address comparison (function "compare_addr") ****/
/****  The constants fill the following structure:  ****/
/****          char *country;                       ****/
/****          int w_first_name;                    ****/
/****          int w_fam_name;                      ****/
/****          int w_street;                        ****/
/****          int w_zip_code;                      ****/
/****          int w_city;                          ****/
/****  TO-DO: Change these constants  or            ****/
/****         add new ones, if necessary            ****/

#define WEIGHTS_ADDR_COMPARE                       \
     {                                             \
       { "D",  1800, 2070, 3740, 2870, 1320 },     \
       { "A",  1470, 2500, 3700, 1550,  700 },     \
       { "CH", 1780, 2500, 3700, 1900, 1230 },     \
       { "DK", 1780, 1300, 3700, 1750,  900 },     \
       { "S",  1640, 1300, 3700, 1750, 1100 },     \
       { "N",  1770, 1300, 3700, 1750,  950 },     \
       { "GB", 1500, 2100, 3300, 4500, 1200 },     \
       { "USA",1740, 1850, 4500, 2500, 1270 },     \
       { "F",  1340, 2500, 3700, 2600, 1380 },     \
       { "E",  1000, 1700, 3700, 2200, 1070 },     \
       { "P",  1000, 1700, 3700, 1600, 1070 },     \
       { "I",  1220, 2300, 3700, 2200, 1300 },     \
       { "*",  AC_WEIGHT_FIRST_NAME,               \
                     AC_WEIGHT_FAM_NAME,           \
                           AC_WEIGHT_STREET,       \
                                 AC_WEIGHT_ZIP_CODE,       \
                                        AC_WEIGHT_CITY },  \
       { NULL,   0,    0,    0,    0,    0  }      \
     }


/****  TO-DO:  change the following structure, if necessary.     ****/
/****  Note: You must also change the corresponding function     ****/
/****  "copy_db_fields_to_search_result" in program "dbselect.c" ****/
/****  Note: Do _NOT_ define pointers. This area is for storage. ****/

struct DB_SEARCH_RESULT
  {
   long matchcode;
   int  points;
   char first_name [LENGTH_FIRST_NAME +1];
   char fam_name [LENGTH_FAM_NAME +1];
   char city [LENGTH_CITY +1];
   char full_birthday [10 +1];
   char cust_number [LENGTH_CUST_NUMBER +1];
  };




/************************************************************/
/****  do not change the following macros and definitions  **/
/************************************************************/

#define ADDR_INSUFFICIENT_SEL_CRITERIA  -10
#define ADDR_TOO_MANY_MATCHES_FOUND     -20
#define ADDR_CANNOT_CREATE_FILE         -30
#define ADDR_CANNOT_READ_FILE           -31
#define ADDR_INTERNAL_ERROR             -50

#define ADDR_SQL_DECLARE_ERROR       -41
#define ADDR_SQL_FETCH_ERROR         -42
#define ADDR_SQL_CLOSE_ERROR         -43

/****  we assume 1 byte chars  ****/
#define HASH_COUNT             256

#define CONV_STRINGS_ARE_INITIALIZED   1
#define MAX_POINTS_FIRST_NAME  2500
#define IS_FAMILY_MEMBER        10
#define MAX_POINTS             100

/****  bit masks for "run_mode" (do not change)  ****/
#define COMPARE_NORMAL               0
#define DATABASE_SELECT              1
#define DB_WILDCARDS_FOR_LIKE        2
#define SEARCH_FAMILY_MEMBERS        4
#define ACCEPT_SIMILAR_BIRTHDAYS     8
#define COMPARE_LANGUAGE_GERMAN     16
#define DO_UNWEIGHTED_COMPARISON    32
#define SKIP_BLANKCUT               64
#define SKIP_UPEXPAND              128

#define COMPRESS_MULTIPLE_CHARS    256
#define LEV_COMPARE_NORMAL       COMPARE_NORMAL
#define LEV_COMPARE_GERMAN       COMPARE_LANGUAGE_GERMAN
#define LEV_SKIP_UPEXPAND        SKIP_UPEXPAND
#define LEV_COMPARE_NUMBER         512

#define TRACE_ADDR                1024
#define TRACE_ERRORS              2048
#define TRACE_LEV                 4096

/****  bit masks for "isxletter" (do not change)  ****/
#define _IS_LOWER_        1
#define _IS_UPPER_        2
#define _IS_UMLAUT_       4
#define _IS_SORTCHAR_     8
#define _IS_SORTCHAR2_   16
#define _IS_DIGIT_       32

/****  macros for wildcards (do not change)  ****/
#define MATCHES_ANY_STRING   '*'
#define MATCHES_ANY_CHAR     '?'
#define DB_LIKE_ANY_STRING   '%'
#define DB_LIKE_ANY_CHAR     '_'

/****  identifier for database fields (do not change)  ****/
#define IS_GENDER               1
#define IS_FIRST_NAME           2
#define IS_FIRST_NAME_PHONET    3
#define IS_FAM_NAME             4
#define IS_FAM_NAME_PHONET      5
#define IS_C_O_NAME             6
#define IS_STREET               7
#define IS_STREET_PHONET        8
#define IS_ZIP_CODE             9
#define IS_CITY                10
#define IS_COUNTRY             11
#define IS_FULL_BIRTHDAY       12
#define IS_DATE_FORMAT         13
#define IS_BIRTH_DAY           14
#define IS_BIRTH_MONTH         15
#define IS_BIRTH_YEAR          16
#define IS_PHONE_NUMBER        17
#define IS_MOBILE_NUMBER       18
#define IS_EMAIL_ADDR          19
#define IS_CUST_NUMBER         20
#define IS_IBAN_CODE           21
#define IS_BANK_ACCOUNT        22

#define LENGTH_GENDER           1
#define LENGTH_FULL_BIRTHDAY   10
#define LENGTH_DATE_FORMAT     10
#define LENGTH_BIRTH_DAY        2
#define LENGTH_BIRTH_MONTH      2
#define LENGTH_FIRST_NAME_PHONET   LENGTH_FIRST_NAME
#define LENGTH_FAM_NAME_PHONET   LENGTH_FAM_NAME
#define LENGTH_STREET_PHONET   LENGTH_STREET
#define LENGTH_WHOLE_NAME  (LENGTH_FIRST_NAME+LENGTH_FAM_NAME+1)


#ifndef GC_ANY_COUNTRY
  #error "Please add files from 'gender.zip' to your project (or deactivate USE_GENDER)"
#endif

#ifndef PHONET_FIRST_RULES
  #error "Please add files from 'phonet.zip' to your project (or deactivate USE_PHONET)"
#endif

#if (WEIGHT_GENDER < 350)
  #error "WEIGHT_GENDER < 350  is not allowed"
#endif

#if (WEIGHT_GENDER > 600)
  #error "WEIGHT_GENDER < 600  is not allowed"
#endif

#if (WEIGHT_COUNTRY < 250)
  #error "WEIGHT_COUNTRY < 250  is not allowed"
#endif

#if (LENGTH_C_O_NAME > LENGTH_WHOLE_NAME)
  #error "LENGTH_C_O_NAME > LENGTH_WHOLE_NAME  is not allowed"
#endif

#if (LENGTH_STREET > LENGTH_WHOLE_NAME)
  #error "LENGTH_STREET > LENGTH_WHOLE_NAME  is not allowed"
#endif

#if (LENGTH_ZIP_CODE > LENGTH_WHOLE_NAME)
  #error "LENGTH_ZIP_CODE > LENGTH_WHOLE_NAME  is not allowed"
#endif

#if (LENGTH_COUNTRY > LENGTH_WHOLE_NAME)
  #error "LENGTH_COUNTRY > LENGTH_WHOLE_NAME  is not allowed"
#endif

#if (LENGTH_CITY > LENGTH_WHOLE_NAME)
  #error "LENGTH_CITY > LENGTH_WHOLE_NAME  is not allowed"
#endif

#if (LENGTH_CUST_NUMBER < 5)
  #error "LENGTH_CUST_NUMBER < 5  is not allowed"
#endif

#if (LENGTH_INTERNAL_VAR < 30)
  #error "LENGTH_INTERNAL_VAR < 30  is not allowed"
#endif


struct MAIL_ADDR
  {
    char *text;
    int info;
  };

struct LEV_RESULT
  {
    int points;
    int max_points;
    int diff;
    int empty_diff;
  };


/****  Note:  Use of the following macros and external variables ****/
/****         requires proper initialization                     ****/
/****         (calling the function "initialize_conv_strings")   ****/
#define IS_LOWER(c)     (isxletter [(unsigned char) c] & 1)
#define IS_UPPER(c)     (isxletter [(unsigned char) c] & 2)
#define IS_UMLAUT(c)    (isxletter [(unsigned char) c] & 4)
#define IS_LETTER(c)    (isxletter [(unsigned char) c] & (1+2+4))
#define IS_SORTCHAR(c)  (isxletter [(unsigned char) c] & 8)
#define IS_SORTCHAR2(c) (isxletter [(unsigned char) c] & 16)
#define IS_DIGIT(c)     (isxletter [(unsigned char) c] & 32)
#define IS_XLETTER(c)   (isxletter [(unsigned char) c] & (1+2+4+32))

#ifdef  __IS_LEV100_C__
#else
   /****  declare external variables from "lev100ph.c"  ****/
   extern int  conv_strings_initialized;
   extern char up_and_conv[HASH_COUNT];
   extern char sortchar [HASH_COUNT];
   extern char sortchar2[HASH_COUNT];
   extern char upperchar[HASH_COUNT];
   extern char lowerchar[HASH_COUNT];
   extern int  isxletter[HASH_COUNT];
#endif



/********************************************************/
/****  function prototypes from "lev100ph.c"  *************/
/********************************************************/

int initialize_conv_strings (int run_mode);
void print_number (char *text, int number, int mode);

struct LEV_RESULT calculate_limit 
            (char pattern[], char text[], int run_mode);

int up_conv (char dest[], char src[], int len);

int up_expand (char src[], char dest[], int len, int run_mode);

struct LEV_RESULT lev_x (char *pattern, char *text,
     char *desc, int max_points, int run_mode);

struct LEV_RESULT lev_zipcode_city (char *zip_code1, char *zip_code2, 
    char *city1, char *city2, char *desc, 
    int max_points, int max_points_city, int run_mode);

struct LEV_RESULT lev_2_ph (char *a_text, char *a2_text, char *a_text_ph, 
     char *a2_text_ph, char *desc, int max_points, int run_mode);


/********************************************************/
/****  function prototypes from "addr.c"  ***************/
/********************************************************/

void StrNCpy (char dest[], char src[], int len);
void blank_cut (char s[], int len);

int check_valid_first_name (char name[]);
int delete_suffix_jun (char a_name[], char a2_name[]);

void format_street (char src[], char dest[], int run_mode);

void split_birthday (char full_birthday[],
    char date_format[], char day[], char month[], char year[]);

void format_mail_addr (struct MAIL_ADDR x_addr[], int run_mode);

double calculate_ln (double d);

void cleanup_addr (void);

int compare_addr (struct MAIL_ADDR a_addr[], struct MAIL_ADDR b_addr[],
      int min_points, int run_mode);



/********************************************************/
/****  function prototypes from "dbselect.c"  ***********/
/********************************************************/

int create_search_strings (char first_name[],
         char fam_name[], char text1[], char text2[],
         char text3[], char text4[], int len,
         int min_pos_first_wildcard, int run_mode);

int check_search_field (char *field, int size_of_string,
         int min_pos_first_wildcard, int run_mode);

int check_lengths_of_search_strings (int run_mode);

int database_select (struct MAIL_ADDR search_addr[],
      int min_points, struct DB_SEARCH_RESULT *search_results[],
      struct DB_SEARCH_RESULT storage_area[], int max_useful_found,
      int run_mode);


/********************************************************/
/****  function prototypes from "dedupl.c"  *************/
/********************************************************/

int do_unload (int file_count, char *file_list[], int run_mode);

int search_duplicates (char *unload_file,
          char *dest_file, int min_points, int run_mode);

#endif

/********************************************************/
/****  end of file "addr_ext.h"  ************************/
/********************************************************/
