/* $Id: Lexicon.cpp 1215 2006-10-17 08:10:24Z olau $ */

/* 
   Copyright (C) 2006 Oliver Lau <ola@ctmagazin.de>,
                      Ingmar Thilo
   Copyright (C) 2006 Heise Zeitschriften Verlag.
   Alle Rechte vorbehalten.
*/

#include "Lexicon.h"

using namespace std;


void Lexicon::add(const char *word)
{
  int i = 0;
  int j = 0;
  int ch = 0;
  while (word[j] != 0 && i < MAX_LEX_SIZE)
  {
    ch = (int) word[j];
#ifdef DEBUG
    std::cout << std::endl << word[j] << std::endl;
#endif
    for (int mask = 1; mask < 256; mask *= 2)
    {
      int bit = (ch & mask)? 1 : 0;
#ifdef DEBUG
      std::cout << bit << " lex[" << i << + "+" << bit << "]";
#endif
      if (lex[i + bit] == 0)
      {
        idx += 2;
        if (mask == 128)
          ++idx; // Platz fr Flags reservieren
        lex[i + bit] = idx;
#ifdef DEBUG
        std::cout << " = " << idx << endl;
#endif
        i = idx;
      }
      else
      {
        i = lex[i + bit];
        std::cout << " hopping " << endl;
      }
    }
    ++j;
  }
  if (i > 0) {
    lex[i - 1] |= END_OF_WORD;
#ifdef DEBUG
    std::cout << "END_OF_WORD @ [" << (i-1) << "]" << std::endl;
#endif
  }
}


void Lexicon::add(istream &input)
{
  char word[MAX_WORD_LENGTH];
  while (!input.eof())
  {
    char ch;
    input.read(&ch, sizeof(ch));
    int i = 0;
    // nchstes Wort einlesen
    while (is_char((int) ch) && i < MAX_WORD_LENGTH)
    {
      word[i++] = ch;
      input.read(&ch, sizeof(ch));
    }
    // Wort zum Lexikon hinzufgen
    if (i > 0) 
    {
      word[i] = 0;
      add(word);
    }
  }
}


bool Lexicon::contains(const char *word, bool find_only_once)
{
  int i = 0;
  int j = 0;
  bool match = true;
  while (word[j] != 0 && i < MAX_LEX_SIZE)
  {
    int ch = (int) word[j];
    for (int mask = 1; (mask < 256) && (match == true); mask *= 2)
    {
      int bit = (ch & mask)? 1 : 0;
      if (lex[i + bit] != 0)
        i = lex[i + bit];
      else
        match = false;
    }
    ++j;
  }
  if (i == 0)
    return false;
  bool found = match && (lex[i - 1] == END_OF_WORD);
  if (found && find_only_once)
  {
    if (lex[i - 1] & VISITED)
      return false;
    lex[i - 1] |= VISITED;
  }
  return found;
}


int Lexicon::cross(istream &input, ostream &output)
{
  char word[MAX_WORD_LENGTH];
  int num_words = 0;
  while (!input.eof())
  {
    int wordlen = readword(word, input);
    if (wordlen > 0)
    {
      if (this->contains(word))
      {
        output << word << endl;
        ++num_words;
      }
    }
  }
  return num_words;
}


int Lexicon::cross(vector<char *> &wordlist)
{
  int num_words = 0;
  size_t n = wordlist.size();
  for (size_t i = 0; i < n; ++i)
  {
    if (this->contains(wordlist[i]))
      ++num_words;
  }
  return num_words;
}


int Lexicon::cross(vector<char *> &wordlist, ostream &output)
{
  int num_words = 0;
  size_t n = wordlist.size();
  for (size_t i = 0; i < n; ++i)
  {
    char *word = wordlist[i];
    if (this->contains(word))
    {
      ++num_words;
      output << word << endl;
    }
  }
  return num_words;
}


int Lexicon::readword(char *word, istream &input)
{
  int i = 0;
  if (!input.eof())
  {
    char ch;
    input.read(&ch, sizeof(ch));
    while (is_char((int) ch) && i < MAX_WORD_LENGTH)
    {
      word[i++] = ch;
      input.read(&ch, sizeof(ch));
    }
  }
  word[i] = 0;
  return i;
}
