/***************************************************************************
     name : main.c
   author : Daniel TAUPIN
  purpose : build and sorts the dictionary of all words in a text file.
            compares them to a dictionary and remove all known words,
            then outputs the list of unknown words.
 *****************************************************************************/

#define SORT_BETWEEN_DICT TRUE

/****************************** includes *************************************/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

#include "dvistatc.h"
#include "version.h"
;

/****************************************************************************/

/********************************* macros ***********************************/

/********************************* global variables *************************/
FILE *fText = NULL;		/* file pointer to input text file */
FILE *fStd = NULL;		/* file pointer to LOG file */
FILE *fUtil = NULL;             /* Utility File-Pointer */
FILE *fDict = NULL;		/* file pointer to dictionary file */
FILE *fOut = NULL;		/* file pointer to output file */

char *progname;			/* name of the executable file */
int Debug = 0;


char ErrorMsgText[WRKMAX] = {'\0'};

char InputFileName[WRKMAX] = {'\0'};
char OutputFileName[WRKMAX] = {'\0'};
char DictionaryRoot[WRKMAX] = {'\0'};
char Language[WRKMAX] = {'\0'};
char GenericName[WRKMAX] = {'\0'};
char GenAbbrevName[WRKMAX] = {'\0'};
char LocAbbrevName[WRKMAX] = {'\0'};
char GenAbbrevFileName[WRKMAX] = {'\0'};
char ForbiddenForms[WRKMAX] = {'\0'};
char DictionaryFileRoot[WRKMAX] = {'\0'};
char DictionaryFileName[WRKMAX] = {'\0'};
int  BeginPageNumber = 0;
int  EndPageNumber = 9999;

char *AbbrevTable[MAXABBREVS] = {NULL};
int  AbbrevOrd[MAXABBREVS] = {0};
int  NumAbbrevs = 0;

char *InWordTable[MAXWORDS] = {NULL};
int  InWordOrd[MAXWORDS] = {0};
int  InWordFirst[MAXWORDS] = {0};
int  InWordLast[MAXWORDS] = {0};
int  InWordRefs[MAXWORDS] = {0};
int  NumInWord = 0;

int Last_m = 0;

char EndAlphabet[4] = {255};
BOOL sort_between_dict = SORT_BETWEEN_DICT;
/****************************************************************************/
/*** function prototypes ***/
BOOL PrepareText(char *filename, FILE **f);  /* opens file for reading */
BOOL PrepareStdout(char *filename, FILE **f);  /* opens file for std output */
BOOL PrepareOut(char *filename, FILE **f); /* creates file and writes */

BOOL CloseText(FILE **f);
BOOL CloseOut(FILE **f);

void Message(char *text);
void OutputError(char *text);
void Usage();

int BackToSlash(char *Name);
int SlashToBack(char *Name);

BOOL StdToScreen = TRUE;
int ReadDictionary(FILE **f, char **DicTable, int DicSize, char *DicName);
int SortDictionary(char **DicTable, int DicSize, char *DicName, int *Ordre);
int ReadText(FILE **f, char **DicTable, int DicSize, char *DicName,
    int *FirstRef, int *LastRef, int *NumRefs);
int CleanAbbrevs();
int CleanPeriods();
int CleanUppers();
int CleanDuplicates();
int CleanFromDictionary(FILE **f, char DictPrefix);
void WordList();
void WordOut(FILE **f);
/****************************************************************************/
int main(int argc, char **argv)
/****************************************************************************
purpose: checks parameter and starts analising input text.
params:  command line arguments argc, argv
globals: initializes in- and output-files.
 ****************************************************************************/
{
#ifndef KPATHSEA
#ifndef HAS_GETOPT
  extern int getopt(int, char **, char *);
#endif
  extern char *optarg;
  extern int optind, opterr;
#endif
  int c, errflag = 0;
  char *input = NULL;
  char *output = NULL;
  char *ttt = NULL;
  int removed, LetterIndex; char DictPrefix;

  EndAlphabet[3] = '\0'; /* required to have a \0 at end of this string */
  fStd = stdout;		/* file pointer to LOG file */
  fOut = stdout;		/* file pointer to output file */

  progname=argv[0];
  printf("%s:\n %s\n", progname, Version);

  if (argc <= 1)
  {
    Usage(); return(1);
  }
  while((c = getopt(argc, argv, "vhD:o:d:l:f:a:A:x:b:e:-:")) != EOF)
  {
    switch(c)
    {
      case 'h':
        Usage();
        return(1);
      case 'v':
        Debug = 1;
        fStd = stderr;
        break;
      case 'd':
        sprintf(DictionaryRoot,"%s",optarg);
        break;
      case 'l':
        sprintf(Language,"%s",optarg);
        break;
      case 'f':
        sprintf(GenericName,"%s",optarg);
        break;
      case 'a':
        sprintf(GenAbbrevName,"%s",optarg);
        break;
      case 'A':
        sprintf(LocAbbrevName,"%s",optarg);
        break;
      case 'x':
        sprintf(ForbiddenForms,"%s",optarg);
        break;
      case 'o':
        sprintf(OutputFileName,"%s",optarg);
        break;
      case 'b':
        sscanf(optarg,"%d",&BeginPageNumber);
        break;
      case 'e':
        sscanf(optarg,"%d",&EndPageNumber);
        break;
      case 'D':
        sscanf(optarg,"%d",&Debug);
        fStd = stdout;
        break;
      case '-':
        if(strcmp(optarg,"help") == 0)
        { Usage(); return(1);
        } else
        { errflag = 1;
        }
        break;
      default:
        fprintf     (stderr," Found: %s\n",argv[optind]);
        errflag = 1;
        Usage();
        return(1);
    }
  }
  if(Debug >= 2) fprintf(fStd,"\nargc=%d",argc);
  if(argc != optind + 1)
  {
    fprintf(stderr,"\nToo many or misplaced file name[s]: %s",argv[optind]);
    Usage();
    return(1);
  } else
  {
    sprintf(InputFileName,"%s",argv[optind]);
    if(strlen(OutputFileName) == 0)
    {
      strcpy(OutputFileName,InputFileName);
      if (ttt = strrchr(OutputFileName,'.'))
      {
        ttt[0] = '\0';
      }
      strcat(OutputFileName, ".wls");
      if (strcmp(InputFileName,OutputFileName) == 0)
      {
        strcpy(OutputFileName,InputFileName);
        strcat(OutputFileName, ".wls");
      }
    }
  }
  if(errflag)
  {
    fprintf(stderr,"\n\nError[s] in command line");
    Usage();
    return(1);
  }
  BackToSlash(InputFileName);
  BackToSlash(OutputFileName);
  if(Debug >= 2) fprintf(fStd,"\nInput file name: '%s'",InputFileName);
  if(Debug >= 2) fprintf(fStd,"\nOutput file name: '%s'",OutputFileName);

  BackToSlash(DictionaryRoot);
  BackToSlash(GenAbbrevName);

  if(Debug >= 2) fprintf(fStd,"\nDictionary root: '%s'",DictionaryRoot);
  if(Debug >= 2) fprintf(fStd,"\nLanguage: '%s'",Language);
  if(Debug >= 2) fprintf(fStd,"\nGenericName: '%s'",GenericName);
  if(Debug >= 2) fprintf(fStd,"\nGenAbbrevName: '%s'",GenAbbrevName);
  if(Debug >= 2) fprintf(fStd,"\nForbiddenForms: '%s'",ForbiddenForms);
  if(Debug >= 2) fprintf(fStd,"\nBegin page: '%d'",BeginPageNumber);
  if(Debug >= 2) fprintf(fStd,"\nEnd page: '%d'",EndPageNumber);
  strcpy(GenAbbrevFileName,GenAbbrevName);

// (* read abbreviations *)

  if(strlen(DictionaryRoot) != 0 && strlen(GenAbbrevName) != 0)
  {
    if(GenAbbrevName[0] == '/')
    {
    } else if(GenAbbrevName[0] == '.')
    {
    } else if(GenAbbrevName[1] == ':')
    {
    } else
    {
      strcpy(GenAbbrevFileName,DictionaryRoot);
      if(Debug >= 2) fprintf(fStd,"\nAbbreviation is now '%s'",GenAbbrevFileName);
      if(GenAbbrevFileName[strlen(GenAbbrevFileName)-1] != '/')
        strcat(GenAbbrevFileName,"/");
      strcat(GenAbbrevFileName,Language);
      if(GenAbbrevFileName[strlen(GenAbbrevFileName)-1] != '/')
        strcat(GenAbbrevFileName,"/");
      strcat(GenAbbrevFileName,GenAbbrevName);
      if(Debug >= 2)
        fprintf(fStd,"\nAbbreviation file name set to '%s'",GenAbbrevFileName);
    }
  }
  /* Reading local and general abbreviations */
  /* Reading local first */
  NumAbbrevs = 0;
  if (LocAbbrevName[0] != '\0')
  {
    /* This version of PrepareText put diagnostic in ErrorMsgText */  	
    if (!PrepareText(LocAbbrevName,&fText)) error(ErrorMsgText);
    if (Debug >= 1) fprintf(fStd,"\nOpened %s",LocAbbrevName);

    NumAbbrevs = ReadDictionary(&fText, AbbrevTable, MAXABBREVS, "abbreviations");
    if (Debug >= 1) fprintf(fStd,"\nFound %d abbreviations",NumAbbrevs);
    CloseText(&fText);
  }
  if (GenAbbrevName[0] != '\0')
  {
    if (!PrepareText(GenAbbrevFileName,&fText)) error(ErrorMsgText);
    if (Debug >= 1) fprintf(fStd,"\nOpened %s",GenAbbrevFileName);

    NumAbbrevs = ReadDictionary(&fText, AbbrevTable, MAXABBREVS, "abbreviations");
    CloseText(&fText);
    if (Debug >= 1) fprintf(fStd,"\nFound %d abbreviations",NumAbbrevs);
  }

  /* sorting abbreviation table */
  NumAbbrevs = SortDictionary(AbbrevTable, MAXABBREVS, "abbreviations",AbbrevOrd);
  if (Debug >= 1) fprintf(fStd,"\nFound %d abbreviations after sorting",NumAbbrevs);

  /* Reading examined text */

  NumInWord = 0;
  if (InputFileName[0] == '\0') error("No input file given!");

  if (!PrepareText(InputFileName,&fText)) error(ErrorMsgText);
  if(Debug >= 1) fprintf(fStd,"\nOpened %s",InputFileName);

  NumInWord = ReadText(&fText, InWordTable, MAXWORDS, "input text",
                       InWordFirst, InWordLast, InWordRefs);
  if (Debug >= 1) fprintf(fStd,"\nFound %d words",NumInWord);
  CloseText(&fText);

  /* sorting words extracted from input text */
  NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
  removed = CleanAbbrevs();
  if (Debug >= 1) fprintf(fStd,"\n %d known abbreviations removed",removed);
  removed = CleanPeriods();
  if (Debug >= 1) fprintf(fStd,"\n %d ending periods removed",removed);
  NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
  removed = CleanAbbrevs();
  if (Debug >= 1) fprintf(fStd,"\n %d known abbreviations removed",removed);
  removed = CleanUppers();
  if (Debug >= 1) fprintf(fStd,"\n %d upper cases moved to lower",removed);
  NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
  if (Debug >= 1) fprintf(fStd,"\nLeft  %d words",NumInWord);
  removed = CleanDuplicates();
  if (Debug >= 1) fprintf(fStd,"\n %d duplicated removed",removed);
  NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
  if (Debug >= 1) fprintf(fStd,"\nLeft  %d words",NumInWord);
  if (Debug >= 2) WordList();

  /* Now we compare the remaining words to the dictionaries */

  strcpy(DictionaryFileRoot,DictionaryRoot);
  if(Debug >= 2) fprintf(fStd,"\nDictionary is now '%s'",DictionaryFileRoot);
  if(DictionaryFileRoot[strlen(DictionaryFileRoot)-1] != '/')
    strcat(DictionaryFileRoot,"/");
  strcat(DictionaryFileRoot,Language);
  if(DictionaryFileRoot[strlen(DictionaryFileRoot)-1] != '/')
    strcat(DictionaryFileRoot,"/");

  if(Debug >= 2) fprintf(fStd,"\nDictionary root is now '%s'",DictionaryFileRoot);

  for (LetterIndex=0; LetterIndex<strlen(DictionaryPrefixes); LetterIndex++)
  {
    DictPrefix = DictionaryPrefixes[LetterIndex];
    sprintf(DictionaryFileName,"%s%c%s",DictionaryFileRoot,DictPrefix,GenericName);
    if(Debug >= 2) fprintf(fStd,"\nDictionary is now '%s'",DictionaryFileName);

    if (!PrepareText(DictionaryFileName,&fDict)) error(ErrorMsgText);
    if(Debug >= 1) fprintf(fStd,"\nOpened %s",DictionaryFileName);
    removed = CleanFromDictionary(&fDict, DictPrefix);
    if (Debug >= 2) fprintf(fStd,"\nExited from CleanFromDictionary, removed=%d\n",removed);
    CloseText(&fDict);
    if (Debug >= 2) fprintf(fStd,"Closed fDict\n");
    if (sort_between_dict)
      NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
    
    if (Debug >= 1) fprintf(fStd,"\nRemoved %d items using %s",removed,DictionaryFileName);
  }
  NumInWord = SortDictionary(InWordTable, MAXWORDS, "input text",InWordOrd);
  if (Debug >= 2) WordList();

  if (OutputFileName[0] == '\0')
  { fprintf(stderr,"\nNo output file given, output to stdout.");
  } else if(!PrepareOut(OutputFileName,&fOut))
  { fprintf(stderr,"\nOutput file given cannot be opened, output to stdout.");
    fOut = stdout;
  }
  WordOut(&fOut);
   
    
}  

/****************************************************************************/
int CleanFromDictionary(FILE **f, char DictPrefix)
/****************************************************************************
purpose: reads dictionary whose name is given, and removes in InWordTable
the word found in dictionary.
output: the number of items removed
 ****************************************************************************/
{
  int cNext = '\0';
  char TheWord[WRKMAX] = {'\0'};
  int WordPosition = 0;
  int k; int m; int removed = 0; int compare;
  int m_start = 0;
  int m_stop = NumInWord;

  int zz = 0;
  BOOL already = FALSE;

  if (Debug >= 3) fprintf(fStd,"\nEntering CleanFromDictionary\n");

  /* define limits of index m according to the selectied dictionary:
     a-z limits to initial letters a-z, 0 has not limits
  */

  if(DictPrefix >= 'a' && DictPrefix <= 'z')
  {
    while (InWordTable[InWordOrd[m_start]][0] < DictPrefix && m_start < NumInWord)
    m_start++;

    while (InWordTable[InWordOrd[m_stop-1]][0] > DictPrefix && m_stop > 1)
    m_stop--;
    if (m_stop < NumInWord) m_stop++;  /* better test one more, than omit */
  }

  {  	
    while(cNext != EOF)
    {
      TheWord[0] = '\0'; WordPosition = '\0'; cNext = '\0';
    /* skip leading blanks and line ends */
      while((cNext != EOF) && (cNext <= ' '))
      {
        cNext = getc(*f);
      }
      if (Debug >= 4) fprintf(fStd,"\ncNext = %2.2x (skipped leading)\n",cNext);
      while((cNext != EOF) && (cNext > ' '))
      {
        TheWord[WordPosition] = cNext;
        WordPosition++;
        if(WordPosition > WRKMAX) error("Word too long!");
        TheWord[WordPosition] = '\0';
        cNext = getc(*f);
        if (Debug >= 4) fprintf(fStd,"\ncNext = %2.2x '%c' TheWord='%s'\n",cNext,cNext,TheWord);
      }
      if (Debug >= 3) fprintf(fStd,"\ncNext = %2.2x (end of word '%s') NumInWord=%d\n",cNext,TheWord,NumInWord);

      m = m_start;
      if(m < 0) m = 0;

      
      while(m < m_stop)
      {
        if (Debug >= 4) fprintf(fStd,
         "\nAvant strcmp: m=%d InWordOrd[m]=%d InWordTable[InWordOrd[m]]='%s' TheWord='%s'",
             m,InWordOrd[m],InWordTable[InWordOrd[m]],TheWord);
        while((compare=strcmp(InWordTable[InWordOrd[m]],TheWord)) < 0)
        {
          if (Debug >= 4) fprintf(fStd,
         "\n m=%d InWordOrd[m]=%d InWordTable[InWordOrd[m]]='%s' TheWord='%s'",
             m,InWordOrd[m],InWordTable[InWordOrd[m]],TheWord);
          m++;
        }
  
        if(compare == 0)
        { Last_m = m+1;
          if(Debug >= 2) fprintf(fStd,"\nRemoving '%s'",InWordTable[InWordOrd[m]]);
          RemoveWord(InWordOrd[m]); removed++;
        } 
        m++;
      }
      if(Debug >= 3) fprintf(fStd,"\nReached m_stop = %d\n",m_stop);
    }
    if(Debug >= 2) fprintf(fStd,"\nReached cNext = EOF %d\n",cNext);
  }
  if (Debug >= 3) fprintf(fStd,"\nCleanFrom -> return Removed = %d\n", removed);
  return removed;
}	
  
/****************************************************************************/
void WordOut(FILE **f)
/****************************************************************************
purpose: displays the list of words not eliminated
 ****************************************************************************/
{
  int k = 0; int q = 0;
  char OutString[WRKMAX] = {' '};

  for (k=0; k<NumInWord; k++)
  {
    strcpy(OutString,InWordTable[InWordOrd[k]]);
    for (q=strlen(OutString); q<WRKMAX; q++)
    { if (q<20)
      { OutString[q] = ' ';
      } else
      { OutString[q] = '\0';
      }
    }
    
    fprintf(*f,"\n%3d %s : %d (pages[s] %d-%d)",k,
      OutString, InWordRefs[InWordOrd[k]], InWordFirst[InWordOrd[k]], InWordLast[InWordOrd[k]]);
  }
}

/****************************************************************************/
void WordList()
/****************************************************************************
purpose: displays the list of words not eliminated
 ****************************************************************************/
{
  int k = 0;

  for (k=0; k<NumInWord; k++)
  { fprintf(fStd,"\n%4d %4d %4d (%4d) %s",k,
      InWordFirst[InWordOrd[k]], InWordLast[InWordOrd[k]], InWordRefs[InWordOrd[k]], InWordTable[InWordOrd[k]]);
  }
}

/****************************************************************************/
void Usage()
/****************************************************************************
purpose: displays a summary of usage options
 ****************************************************************************/
{
    sprintf(ErrorMsgText,"Usage: %s [options] <input file>",
      progname);
    OutputError(ErrorMsgText);
    OutputError("Options:");
    OutputError("  -a<abbrev>    : dictionary of abbreviations");
    OutputError("  -A<loc-abbr>  : local dictionary of abbreviations");
    OutputError("  -d<directory> : root directory for dictionaries");
    OutputError("  -f<name>      : generic name for dictionaries");
    OutputError("  -l<language>  : root subdirectory for dictionaries");
    OutputError("  -o<output>    : output file name");
    OutputError("  -x<forbid>    : dictionary of forbidden forms");
    OutputError("  -v            : verbose");
    OutputError("  -D<n>         : <n> = debug level (1=verbose)");
    OutputError("  -b<n>         : ignore words of page number < n");
    OutputError("  -e<p>         : ignore words of page number > p");
    OutputError("Note: abbreviations are case sensitive");
    OutputError("\r");
}

/****************************************************************************/
int SortDictionary(char **DicTable, int DicSize, char *DicName, int *Ordre)
/****************************************************************************
params: DicTable: the stored table, DicSize: max number of items
output: the total number of items in that table
 ****************************************************************************/
{
  int NumWords = 0; int k = 0; int quick,l,nb_words_quick,x,xp,m;
  BOOL desordre; int compare; int NewNumWords = 0;

  while(DicTable[NumWords] != NULL)
  { NumWords++; if (NumWords >= DicSize) error("Sorting error 1");
  }

/* starting sorting the words */
  if (Debug >= 2) fprintf(fStd,"\nSorting %d words in %s", NumWords, DicName);

  desordre = TRUE;
  for (k=0; k<NumWords; k++) Ordre[k] = k; /* initialize sorting */
  quick = NumWords/2;
  if (quick == 0) quick = 1;
  if (Debug >= 2) fprintf(fStd,"\n quick = %d", quick);

  while (desordre || (quick > 1))
  {
     desordre = FALSE;
     for (l = 0; l<NumWords-quick; l++)
     {
       x = Ordre[l]; xp = Ordre[l+quick];
       compare = strcmp(DicTable[x],DicTable[xp]);
       if (compare > 0)
       { desordre = TRUE;
         if (Debug >= 4) fprintf(fStd,"\n invert %d(%d) <-> %d(%d)",
          l+quick,Ordre[l+quick],l,Ordre[l]);
         Ordre[l+quick] = x; Ordre[l] = xp;
       }
     }
     if (!desordre)
     {
       if (Debug > 1) fprintf(fStd,"\n  quick= %d", quick);
       if (quick>1)
       {
         quick = quick/2; if (quick == 0) quick = 1;
         desordre = TRUE;
       }
     }
  }
//   ; {end while desordre}
  for (k=0; k<NumWords; k++)
  {
    if (strcmp(EndAlphabet,DicTable[Ordre[k]]) != 0) NewNumWords = k+1;  	
    if (Debug >= 3) fprintf(fStd,"\n%4d %s",k,DicTable[Ordre[k]]);
  }
  if (Debug >= 2) fprintf(fStd,"\nNew NumWords = %d", NewNumWords);
  return NewNumWords;
}	
/****************************************************************************/
int CleanDuplicates()
/****************************************************************************
purpose: removes duplicates in InWordTable, merging the page references.
output: the number of items removed
 ****************************************************************************/
{ char *InWord;  char *NextWord; 
  int m=0; int len; int removed=0; int k=0;

  if (Debug >= 2) fprintf(fStd,"\nRemoving duplicated words after cleanings");
  for (m=0; m<NumInWord-1; m++)
  {
    InWord = InWordTable[InWordOrd[m]];
    NextWord = InWordTable[InWordOrd[m+1]];
    if (Debug >= 3) fprintf(fStd,"\n m=%d, InWord='%s', Nextword='%s'",m,InWord,NextWord);
    if (InWord == NULL)
    {
    } else if(InWord[0] == '\0')
    {
    } else if(strcmp(InWord,NextWord) == 0)
    {
    	if(Debug >= 2) fprintf(fStd,"\n '%s' removed",NextWord);

      if(InWordFirst[InWordOrd[m]] > InWordFirst[InWordOrd[m+1]])
        InWordFirst[InWordOrd[m]] = InWordFirst[InWordOrd[m+1]];
      if(InWordLast[InWordOrd[m]] < InWordLast[InWordOrd[m+1]])
        InWordLast[InWordOrd[m]] = InWordLast[InWordOrd[m+1]];
      InWordRefs[InWordOrd[m]] += InWordRefs[InWordOrd[m+1]];

      removed += RemoveWord(InWordOrd[m+1]);
    }
  }

  return removed;
}	
/****************************************************************************/
int CleanPeriods()
/****************************************************************************
purpose: removes ending periods in InWordTable, respecting upper/lower case.
output: the number of items removed
 ****************************************************************************/
{ char *InWord;
  int m=0; int len; int removed=0;
  for (m=0; m<NumInWord; m++)
  {
    InWord = InWordTable[InWordOrd[m]];
    if (InWord == NULL)
    {
    } else while(strlen(InWord) > 0  && InWord[strlen(InWord)-1] == '.')
    { len = strlen(InWord);
      if(InWord[len-1] == '.')
      { InWord[len-1] = '\0'; removed++;
      }
    }
    if(strlen(InWord) == 0) RemoveWord(InWordOrd[m]);
  }
  return removed;
}	
/****************************************************************************/
int CleanUppers()
/****************************************************************************
purpose: moves all letters to lower case
 ****************************************************************************/
{ char *InWord;
  int m=0; int len; int removed=0; int z; char TheChar;
  for (m=0; m<NumInWord; m++)
  {
    InWord = InWordTable[InWordOrd[m]];
    if (InWord == NULL)
    {
    } else if(InWord[0] != '\0')
    { len = strlen(InWord);
      for (z=0;z<len;z++)
      {
      	TheChar = InWord[z];
      	if(TheChar >= 'A' && TheChar <= 'Z')
      	{ InWord[z] = TheChar+'a'-'A'; removed++;
      	} else if (TheChar >= 128)
      	{
      	  switch(TheChar)
      	  {

#ifdef MSDOS
#include "lower850.h"
#else
#include "loweriso.h"
#endif
            default: ;
      	  }
      	}
      }
    }
  }
  return removed;
}	
/****************************************************************************/
int RemoveWord(int zzz)
/****************************************************************************
purpose: removes a word InWordTable
output: the number of items removed
 ****************************************************************************/
{
  strcpy(InWordTable[zzz],EndAlphabet);
  InWordFirst[zzz] = 9999;
  InWordLast[zzz] = -9999;
  InWordRefs[zzz] = 0;
  return 1;
  
}
/****************************************************************************/
int CleanAbbrevs()
/****************************************************************************
purpose: removes abbreviations in InWordTable, respecting upper/lower case.
output: the number of items removed
 ****************************************************************************/
{
  int k=0; int m=0; /* k: index in abbrevs, m: index in words. */
  int klass=0; char *InWord = NULL; char *Abbrev = NULL;
  int NumRemoved = 0;

  k = 0; m = 0;
  while (k<NumAbbrevs && m<NumInWord)
  { /* strict comparison */
    InWord = InWordTable[InWordOrd[m]];
    Abbrev = AbbrevTable[AbbrevOrd[k]];
    klass = strcmp(InWord, Abbrev);
    if(Debug >= 3) fprintf(fStd,"\nCleanabb: m=%d k=%d InWord='%s', Abbrev='%s', klass=%d",
                                m, k, InWord, Abbrev, klass);
    if(klass < 0)
    { m++;
    } else if(klass > 0)
    { k++;
    } else
    { if (Debug >= 2) fprintf(fStd,"\n Removed '%s'",InWord);
      RemoveWord(InWordOrd[m]); NumRemoved ++; m++;
    }
  }
  return NumRemoved;
}
/****************************************************************************/
int ReadText(FILE **f, char **DicTable, int DicSize, char *DicName,
    int *FirstRef, int *LastRef, int *NumRefs)
/****************************************************************************
params: f: input FILE, DicTable: the stored table, DicSize: max number of items
FirstRef, LastRef, Numrefs: tables of fist, last page references of that word
in the text, NumRefs: the number of references of that word,
output: the total number of items in that table
 ****************************************************************************/
{
  int cNext = '\0';
  char TheWord[WRKMAX] = {'\0'};
  char ThePage[WRKMAX] = {'\0'};
  int WordPosition = 0;
  int ThePageNum = -9999;
  int k;
  char *TheTrueWord = NULL;
  char *ttt = NULL;

  int zz = 0;
  BOOL already = FALSE;

  while(cNext != EOF)
  {
    TheWord[0] = '\0'; WordPosition = '\0'; cNext = '\0';
  /* skip leading blanks and line ends */
    while((cNext != EOF) && (cNext <= ' '))
    {
      cNext = getc(*f);
    }

    while((cNext != EOF) && (cNext > ' '))
    {
      TheWord[WordPosition] = cNext;
      WordPosition++;
      if(WordPosition > WRKMAX)
      { TheWord[80] = '.'; TheWord[81] = '.'; TheWord[82] = '.';
        TheWord[83] = '\0'; sprintf(ErrorMsgText,"Word '%s' too long!", TheWord);
        error(ErrorMsgText);
      }
      TheWord[WordPosition] = '\0';
      cNext = getc(*f);
    }
    if (Debug >= 4) fprintf(fStd,"\n '%s' cNext='%2.2x'",TheWord,cNext);

/* Separate the possible initial page number from the rest of the word */

    TheTrueWord = strchr(TheWord,':');
    if (TheTrueWord == NULL)
    { TheTrueWord = TheWord;
    } else
    { ThePageNum = 0;  TheTrueWord++;
      strcpy(ThePage,TheWord); ttt = strchr(ThePage,':'); ttt[0] = '\0';
      for (k = 0; k<strlen(ThePage); k++)
      { if(ThePage[k] >= '0' && ThePage[k] <='9')
        ThePageNum = 10*ThePageNum + ThePage[k] - '0';
        if(ThePage[0] == '-') ThePageNum = -ThePageNum;
      }
    }

    if(ThePageNum >= BeginPageNumber && ThePageNum <= EndPageNumber)
    {
      already = strlen(TheTrueWord) == 0;
      zz = 0;
      while(DicTable[zz] != NULL)
      {
        if(strcmp(TheTrueWord,DicTable[zz]) == 0)
        { already = TRUE;
          if (ThePageNum > LastRef[zz]) LastRef[zz] = ThePageNum;
          if (ThePageNum < FirstRef[zz]) FirstRef[zz] = ThePageNum;
          NumRefs[zz] ++;
        }
        zz++;
        if(zz >= DicSize)
        { sprintf(ErrorMsgText,"Dictionary table '%s' exceeded!", DicName);
          error(ErrorMsgText);
        }
      }
      if (!already)
      {
        DicTable[zz] = malloc(WordPosition+4);
        sprintf(DicTable[zz],"%s",TheTrueWord);
        FirstRef[zz] = ThePageNum;
        LastRef[zz] = ThePageNum;
        NumRefs[zz] = 1;
        if (Debug >= 3) fprintf(fStd,"\n%d stored %s %s", zz, TheTrueWord, DicTable[zz]);
        zz++;
      }
    }
  }
  if (Debug >= 2)
  { for (k=0; k<zz; k++)
    { fprintf(fStd,"\n%4d %4d %4d (%4d) %s",k,
      FirstRef[k], LastRef[k], NumRefs[k], DicTable[k]);
    }
  }
  return zz;
}

/****************************************************************************/
int ReadDictionary(FILE **f, char **DicTable, int DicSize, char *DicName)
/****************************************************************************
params: f: input FILE, DicTable: the stored table, DicSize: max number of items
output: the total number of items in that table
 ****************************************************************************/
{
  int cNext = '\0';
  char TheWord[WRKMAX] = {'\0'};
  int WordPosition = 0;
  int k;

  int zz = 0;
  BOOL already = FALSE;

  while(cNext != EOF)
  {
    TheWord[0] = '\0'; WordPosition = '\0'; cNext = '\0';
  /* skip leading blanks and line ends */
    while((cNext != EOF) && (cNext <= ' '))
    {
      cNext = getc(*f);
    }

    while((cNext != EOF) && (cNext > ' '))
    {
      TheWord[WordPosition] = cNext;
      WordPosition++;
      if(WordPosition > WRKMAX) error("Word too long!");
      TheWord[WordPosition] = '\0';
      cNext = getc(*f);
    }
//    if (Debug >= 2) fprintf(fStd,"\n '%s' cNext='%2.2x'",TheWord,cNext);
    already = strlen(TheWord) == 0;
    zz = 0;
    while(DicTable[zz] != NULL)
    {
      if(strcmp(TheWord,DicTable[zz]) == 0) already = TRUE;
      zz++;
      if(zz >= DicSize)
      { sprintf(ErrorMsgText,"Dictionary table '%s' exceeded!", DicName);
        error(ErrorMsgText);
      }
    }
    if (!already)
    {
      DicTable[zz] = malloc(WordPosition+4);
      sprintf(DicTable[zz],"%s",TheWord);
      if (Debug >= 3) fprintf(fStd,"\n%d stored %s %s", zz, TheWord, DicTable[zz]);
      zz++;
    }
  }
  if (Debug >= 2)
  { for (k=0; k<zz; k++)
    { fprintf(fStd,"\n%4d %s",k,DicTable[k]);
    }
  }
  return zz;
}

/****************************************************************************/
void Message(char *text)
/****************************************************************************
purpose: writes message on stdout
 ****************************************************************************/
{
  fprintf(fStd,"\n%s",text);  	
}

/****************************************************************************/
void OutputError(char *text)
/****************************************************************************
purpose: writes error or diagnostic message on both stderr and stdout
 ****************************************************************************/
{
  if (!StdToScreen)
  {
    fprintf(stderr,"\n%s",text);  	
  };
  fprintf(fStd,"\n%s",text);  	
}

/****************************************************************************/
void error(char * text)
/****************************************************************************
purpose: writes error message
globals: reads progname;
 ****************************************************************************/
{
  char errormessage[256] = "";
  sprintf(errormessage,"%s: ERROR: %s",progname,text);
  OutputError(errormessage);  	
  OutputError("Program aborted\n");
  exit(-1);
}
/****************************************************************************/
void Error(char * text)
/****************************************************************************
purpose: writes error message without progname
 ****************************************************************************/
{
  char errormessage[256] = "";
  sprintf(errormessage,"ERROR: %s",text);
  OutputError(errormessage);  	
  OutputError("Program aborted\n");
  exit(-1);
}
/****************************************************************************/
void warning(char * text)
/****************************************************************************
purpose: writes diagnostic message
 ****************************************************************************/
{
  char errormessage[256] = "";
  sprintf(errormessage,"WARNING: %s",text);
  OutputError(errormessage);  	
}

/******************************************************************************/
int BackToSlash(char *Name)
/******************************************************************************
Changes all backslashes to slashes in given string
 ******************************************************************************/
{
  int z; int num_changes = 0;
  int m = strlen(Name);

  for (z=0; z<strlen(Name); z++)
  {
    if(Name[z] == '\\')
    { Name[z] = '/'; num_changes++;
    }
  }
  return num_changes;
}

/******************************************************************************/
int SlashToBack(char *Name)
/******************************************************************************
Changes all slashes to backslashes in given string
 ******************************************************************************/
{
  int z; int num_changes = 0;
  int m = strlen(Name);

  for (z=0; z<strlen(Name); z++)
  {
    if(Name[z] == '/')
    { Name[z] = '\\'; num_changes++;
    }
  }
  return num_changes;
}


/****************************************************************************/
BOOL PrepareText(char *filename, FILE **f)  /* opens file for reading */
/****************************************************************************
purpose: opens input file.
params: filename - name of inputfile
        f - pointer to filepointer to store file ID
 ****************************************************************************/

{ if (Debug >= 3) fprintf(fStd,"\nEntering PrepareText");
  if(filename != NULL)
  {
    if(filename[0] != '\0')
    {
      if ((*f = fopen(filename,"rb")) == NULL)	 /* open file */
      {
      	sprintf(ErrorMsgText,"Error opening file %s",filename);
        return FALSE;
      } else
      {
        return TRUE;
      }
    } else error("Fatal : opening an empty file name!");
  } else error("Fatal : opening an NULL file name!");
}


/****************************************************************************/
BOOL CloseText(FILE **f)
/****************************************************************************
purpose: closes input file.
params: f - pointer to filepointer to invalidate
 ****************************************************************************/
{
  if(*f != stdin)
      fclose(*f);
  *f = NULL;
  return TRUE;
}

/****************************************************************************/
BOOL PrepareOut(char *filename, FILE **f)  /* creates file */
/****************************************************************************
purpose: creates output file.
params: filename - name of outputfile
        f - pointer to filepointer to store file ID
 ****************************************************************************/
/* Note: fopen yields a pointer to a FILE */
{
  if(filename != NULL)
  {
      if ((*f = fopen(filename,"wb")) == NULL)	 /* open file */
      {
      	sprintf(ErrorMsgText,"Error opening output file %s",filename);
        error(ErrorMsgText);
        exit(1);
      }
  };
  return TRUE;
 }


