/*@z36.c:Hyphenation: Declarations@*******************************************/
/*                                                                           */
/*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.24)                       */
/*  COPYRIGHT (C) 1991, 2000 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@cs.usyd.edu.au)                                */
/*  Basser Department of Computer Science                                    */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 2, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         z36.c                                                      */
/*  MODULE:       Hyphenation                                                */
/*  EXTERNS:      Hyphenate()                                                */
/*                                                                           */
/*****************************************************************************/
#include "externs.h"
#define	NODE_MULT	4		/* what to multiply node indexes by  */
#define MAX_CHAR	256		/* max chars represented in one char */
#define TRIE_MAGIC	5361534
#define KILL_CLASS	0		/* characters preventing hyphenation */
#define PUNCT_CLASS	1		/* characters delimiting hyphenation */


/*****************************************************************************/
/*                                                                           */
/*  static tex_codes[]                                                       */
/*                                                                           */
/*  TeX hyphenation files often contain escape sequences consisting of a     */
/*  backslash and two or three characters to denote 8-bit characters.  This  */
/*  code will read and translate such sequences if they are in the following */
/*  list.                                                                    */
/*                                                                           */
/*****************************************************************************/

static char *tex_codes[] = {

	"Agrave",	"`A",	"\300",
	"Aacute",	"'A",	"\301",
	"Acircumflex",	"^A",	"\302",
	"Atilde",	"~A",	"\303",
	"Adieresis",	"\"A",	"\304",
	"agrave",	"`a",	"\340",
	"aacute",	"'a",	"\341",
	"acircumflex",	"^a",	"\342",
	"atilde",	"~a",	"\343",
	"adieresis",	"\"a",	"\344",

	"ccedilla",	"cc",	"\347",

	"Egrave",	"`E",	"\310",
	"Eacute",	"'E",	"\311",
	"Ecircumflex",	"^E",	"\312",
	"Edieresis",	"\"E",	"\313",
	"egrave",	"`e",	"\350",
	"eacute",	"'e",	"\351",
	"ecircumflex",	"^e",	"\352",
	"edieresis",	"\"e",	"\353",

	"Igrave",	"`I",	"\314",
	"Iacute",	"'I",	"\315",
	"Icircumflex",	"^I",	"\316",
	"Idieresis",	"\"I",	"\317",
	"igrave",	"`\\i",	"\354",
	"iacute",	"'\\i",	"\355",
	"icircumflex",	"^\\i",	"\356",
	"idieresis",	"\"\\i","\357",

	"Ograve",	"`O",	"\322",
	"Oacute",	"'O",	"\323",
	"Ocircumflex",	"^O",	"\324",
	"Otilde",	"~O",	"\325",
	"Odieresis",	"\"O",	"\326",
	"ograve",	"`o",	"\362",
	"oacute",	"'o",	"\363",
	"ocircumflex",	"^o",	"\364",
	"otilde",	"~o",	"\365",
	"odieresis",	"\"o",	"\366",

	"Ugrave",	"`U",	"\331",
	"Uacute",	"'U",	"\332",
	"Ucircumflex",	"^U",	"\333",
	"Udieresis",	"\"U",	"\334",
	"ugrave",	"`u",	"\371",
	"uacute",	"'u",	"\372",
	"ucircumflex",	"^u",	"\373",
	"udieresis",	"\"u",	"\374",

	"",		"",	""
};

static void DecodeEscapes(FULL_CHAR *str, FULL_CHAR *fname, int hline_num)
{ FULL_CHAR *p, *q;
  int i;
  p = q = str;
  while( *q != '\0' )
  {
    if( *q == '\\' )
    { for( i = 0;  tex_codes[i][0] != '\0';  i += 3 )
      {
	if( StringBeginsWith(q+1, AsciiToFull(tex_codes[i+1])) )
	  break;
      }
      if( tex_codes[i][0] != '\0' )
      {
	StringCopy(p, tex_codes[i+2]);
	p += StringLength(AsciiToFull(tex_codes[i+2]));
	q += StringLength(AsciiToFull(tex_codes[i+1])) + 1;
      }
      else
      {
	Error(36, 1, "in hyphenation file %s, unknown escape sequence (line %d)",
	  FATAL, no_fpos, fname, hline_num);
      }
    }
    else *p++ = *q++;
  }
  *p++ = '\0';
} /* end DecodeEscapes */


/*****************************************************************************/
/*                                                                           */
/*  static TRIE HyphTables[]                                                 */
/*                                                                           */
/*  The packed hyphenation tables, indexed by language.  An entry is NULL    */
/*  when the table for that language has not yet been read in; TriedFile     */
/*  is TRUE after we have tried to read that file, whether or not we were    */
/*  successful.                                                              */
/*                                                                           */
/*****************************************************************************/

typedef struct trie_rec
{ int		magic;			/* a magic number to make sure ok    */
  int		class_count;		/* the number of character classes   */
  unsigned char	class[MAX_CHAR];	/* the character classes             */
  short		*node_mem;		/* the node memory                   */
  int		node_lim;		/* top of node memory                */
  int		node_free;		/* first free space in node memory   */
  FULL_CHAR	*string_mem;		/* the string memory                 */
  int		string_lim;		/* top of string memory              */
  int		string_first;		/* the first (last inserted) string  */
} *TRIE;

static TRIE	HyphTables[MAX_LANGUAGE] = { NULL };
static BOOLEAN	TriedFile[MAX_LANGUAGE]  = { FALSE };


/*@::CompressValue(), UncompressValue()@**************************************/
/*                                                                           */
/*  CompressValue(p, q)                                                      */
/*                                                                           */
/*  Compress value string p, placing the result in q.                        */
/*                                                                           */
/*****************************************************************************/
#define FirstHalf(y)		( (y) >> 4 )
#define LastHalf(y)		( (y) & 15 )
#define AssignFirstHalf(x, y)	( (x) = ((y) << 4) )
#define AssignLastHalf(x, y)	( (x) |= (y) )

#define CompressValue(compressed, uncompressed)				\
/* FULL_CHAR *compressed, *uncompressed; */				\
{ register FULL_CHAR *p, *q;						\
  p = compressed;  q = uncompressed;					\
  for( ; ; )								\
  {									\
    if( *q == (FULL_CHAR) '\0' )					\
    { *p = (FULL_CHAR) '\0';						\
      break;								\
    }									\
    AssignFirstHalf(*p, *q++ - '0' + 2);				\
    if( *q == (FULL_CHAR) '\0' )					\
    { *++p = (FULL_CHAR) '\0';						\
      break;								\
    }									\
    AssignLastHalf(*p, *q++ - '0' + 2);					\
    p++;								\
  }									\
}

/*****************************************************************************/
/*                                                                           */
/*  UncompressValue(q, p)                                                    */
/*                                                                           */
/*  Uncompress value string q, placing the result in p.                      */
/*                                                                           */
/*****************************************************************************/

#define UncompressValue(compressed, uncompressed)			\
/* FULL_CHAR *compressed, *uncompressed; */				\
{ register FULL_CHAR *p, *q;						\
  p = compressed; q = uncompressed;					\
  for( ; ; )								\
  {									\
    if( FirstHalf(*p) == '\0' )  break;					\
    *q++ = FirstHalf(*p) + '0' - 2;					\
    if( LastHalf(*p) == '\0' )  break;					\
    *q++ = LastHalf(*p) + '0' - 2;					\
    p++;								\
  }									\
  *q = (FULL_CHAR) '\0';						\
}

/*@::AltCompressValue(), AltUncompressValue()@********************************/
/*                                                                           */
/*  AltCompressValue(compressed, uncompressed)                               */
/*                                                                           */
/*  Compress value string, placing the result in compressed.                 */
/*                                                                           */
/*  This is an alternative compression scheme to the one given above, which  */
/*  should give better compression.  The result is a sequence of pairs of    */
/*  the form (skip, value) saying that we are to skip so many places and     */
/*  then insert the given non-zero value.  All the other values are zero.    */
/*  Skip values are 4-bit numbers (maximum skip is 15, but we will insert    */
/*  a 15 skip with a zero value in the rare case of skipping further).       */
/*  Values are also 4-bit numbers, known to be non-zero.  So the memory      */
/*  cost is 8 bits per non-zero value.                                       */
/*                                                                           */
/*****************************************************************************/
#define CharPack(ch, a, b) (ch = ((a) << 4) | (b))
#define CharUnPack(ch, a, b) ((a) = (ch) >> 4, (b) = (ch) & 15)

#define AltCompressValue(compressed, uncompressed)			\
/* FULL_CHAR *compressed, *uncompressed; */				\
{ register FULL_CHAR *p, *q, *prev;					\
  prev = (uncompressed) - 1;  p = (compressed);				\
  for( q = (uncompressed);  *q != (FULL_CHAR) '\0';  q++ )		\
  {									\
    if( *q != (FULL_CHAR) '0' || q - prev - 1 >= 15 )			\
    {									\
      CharPack(*p++, q - prev - 1, *q - '0' + 2);			\
      prev = q;								\
    }									\
  }									\
  *p++ = (FULL_CHAR) '\0';						\
}

/*****************************************************************************/
/*                                                                           */
/*  AltUncompressValue(q, p)                                                 */
/*                                                                           */
/*  Uncompress value string q, placing the result in p.                      */
/*                                                                           */
/*****************************************************************************/

#define AltUncompressValue(compressed, uncompressed)			\
{ register FULL_CHAR *p, *q, xval; int i, skip;				\
  q = (uncompressed);							\
  for( p = (compressed);  *p != (FULL_CHAR) '\0';  p++ )		\
  { CharUnPack(*p, skip, xval);						\
    for( i = 0;  i < skip;  i++ )					\
      *q++ = (FULL_CHAR) '0';						\
    *q++ = (FULL_CHAR) (xval + '0' - 2);					\
  }									\
  *q++ = (FULL_CHAR) '\0';						\
  debug1(DHY, D, "AltUncompressValue returning %s", (uncompressed));	\
}

/* ***
static void AltUncompressValue(FULL_CHAR *compressed, FULL_CHAR *uncompressed)
{ register FULL_CHAR *p, *q, xval; int i, skip;				
  q = uncompressed;							
  for( p = compressed;  *p != (FULL_CHAR) '\0';  p++ )			
  { CharUnPack(*p, skip, xval);						
    for( i = 0;  i < skip;  i++ )					
      *q++ = (FULL_CHAR) '0';						
    *q++ = (FULL_CHAR) (xval + '0' - 2);					
  }									
  *q++ = (FULL_CHAR) '\0';						
  debug1(DHY, D, "AltUncompressValue returning %s", uncompressed);
}
*** */

/*@@**************************************************************************/
/*                                                                           */
/*  ClassConvert(in, out, T, fname, hline_num)                               */
/*                                                                           */
/*  Set out[i] to the character class of in[i] in T, for all i.              */
/*                                                                           */
/*****************************************************************************/

#define ClassConvert(in, out, T, fname, hline_num)			\
{ int i;								\
  for( i = 0;  in[i] != '\0';  i++ )					\
    if( T->class[in[i]] != 0 )  out[i] = T->class[in[i]];		\
    else								\
      Error(36, 2, "in hyphenation file %s, line %d: character (octal %o) is not in any class",	\
	FATAL, no_fpos, fname, hline_num, in[i]);				\
  out[i] = '\0';							\
} /* end ClassConvert */


/*@::findrep(), TrieRetrieve(), ShowRate()@***********************************/
/*                                                                           */
/*  findrep(i, T)     Returns one character whose class in T is i.           */
/*                                                                           */
/*****************************************************************************/
#if DEBUG_ON

static FULL_CHAR findrep(int i, TRIE T)
{ int ch;
  for( ch = 0;  ch < MAX_CHAR;  ch++ )
    if( T->class[ch] == i ) return (FULL_CHAR) ch;
  Error(36, 3, "DoTriePrint: findrep failed", INTERN, no_fpos);
  return (FULL_CHAR) ch;  /* never reached, but gcc doesn't know that */
} /* end findrep */


#if 0
/*****************************************************************************/
/*                                                                           */
/*  static FULL_CHAR *TrieRetrieve(key, T)                                   */
/*                                                                           */
/*  Retrieve the value associated with key in T, or NULL if not present.     */
/*  This procedure is not presently in use.                                  */
/*                                                                           */
/*****************************************************************************/

static FULL_CHAR *TrieRetrieve(FULL_CHAR *key, TRIE T)
{ FULL_CHAR str[MAX_BUFF];  int i, curr_node, next_node, pos;
  debug1(DHY, DD, "TrieRetrieve(%s, T)", key);
  ClassConvert(key, str, T, STR_EMPTY, 0);

  /* invariant: curr_node is an existing node of T with prefix str[0..i-1] */
  curr_node = i = 0;
  for(;;)
  {
    /* if next_node is 0, the string was never inserted */
    next_node = T->node_mem[curr_node + str[i]];
    if( next_node == 0 )  return (FULL_CHAR *) NULL;

    /* if next_node < 0 it represents an offset into the string memory */
    if( next_node < 0 )
    { pos = - next_node;
      if( str[i] != '\0' )
      {	do
	{ if( str[++i] != T->string_mem[pos++] )  return (FULL_CHAR *) NULL;
	} while( str[i] != '\0' );
      }
      return &(T->string_mem[pos]);
    }

    /* otherwise next_node is the trie node to be searched next */
    curr_node = NODE_MULT*next_node;  i++;
  }
} /* end TrieRetrieve */
#endif


/*****************************************************************************/
/*                                                                           */
/*  static ShowRate(key, start, stop, rate, fp)                              */
/*                                                                           */
/*  Debug print of key[] and rate[] on file fp.                              */
/*                                                                           */
/*****************************************************************************/

static void ShowRate(FULL_CHAR *key, int start, int stop, FULL_CHAR *rate,
FILE *fp)
{ int i;
  fprintf(fp, "key:    ");
  for( i = start;  i < stop;  i++ )  fprintf(fp, " %c", key[i]);
  fprintf(fp, "\nrate:");
  for( i = 0;  rate[i] != '\0';  i++ )  fprintf(fp, " %c", rate[i]);
  fprintf(fp, "\n");
} /* end ShowRate */


/*@::DoTriePrint(), TriePrint()@**********************************************/
/*                                                                           */
/*  static DoTriePrint(T, node, len, fp)                                     */
/*                                                                           */
/*  Print on file fp the subset of the entries of trie T stored in node and  */
/*  its descendants.  The node has prefix prefix[0..len-1].                  */
/*                                                                           */
/*****************************************************************************/

static FULL_CHAR prefix[MAX_BUFF];

static void DoTriePrint(TRIE T, int node, int len, FILE *fp)
{ int i, next_node, pos;  FULL_CHAR str[20];
  for( i = 0;  i < T->class_count;  i++ )
  {
    /* if next_node < 0, have string to print */
    next_node = T->node_mem[node + i];
    if( next_node < 0 )
    {
      prefix[len] = '\0';
      fprintf(fp, "%s", prefix);
      pos = - next_node;
      if( i != 0 )
      {
	fprintf(fp, "%c", findrep(i, T));
	while( T->string_mem[pos] != '\0' )
	{ fprintf(fp, "%c", findrep(T->string_mem[pos], T));
	  pos++;
	}
	pos++;
      }
      AltUncompressValue(&(T->string_mem[pos]), str);
      fprintf(fp, " %s\n", str);
    }

    /* else if next_node > 0 have a child node to explore */
    else if( next_node > 0 )
    { assert( i > 0, "DoTriePrint: i == 0!" );
      prefix[len] = findrep(i, T);
      prefix[len+1] = '\0';
      DoTriePrint(T, NODE_MULT*next_node, len+1, fp);
    }
  }
} /* end DoTriePrint */


/*****************************************************************************/
/*                                                                           */
/*  static TriePrint(T, fp)                                                  */
/*                                                                           */
/*  Print trie T on file fp.                                                 */
/*                                                                           */
/*****************************************************************************/

static void TriePrint(TRIE T, FILE *fp)
{ int i, ch;
  assert( T-> magic == TRIE_MAGIC, "TriePrint: magic!" );
  fprintf(fp, "Classes:");
  for( i = 1;  i < T->class_count;  i++ )
  { fprintf(fp, " ");
    for( ch = 0;  ch < MAX_CHAR;  ch++ )
      if( T->class[ch] == i )  fprintf(fp, "%c", ch);
  }
  fprintf(fp, "\n");
  fprintf(fp, "Node space: %d capacity, %d used\n", T->node_lim, T->node_free);
  fprintf(fp, "String space: %d capacity, %d used\n", T->string_lim,
	T->string_lim - T->string_first);
  prefix[0] = '\0';
  DoTriePrint(T, 0, 0, fp);
} /* end TriePrint */
#endif


/*@::NewTrie(), NewTrieString(), NewTrieNode()@*******************************/
/*                                                                           */
/*  static TRIE NewTrie(node_lim, string_lim)                                */
/*                                                                           */
/*  Initialize a new trie with this much space for nodes and strings.        */
/*                                                                           */
/*****************************************************************************/

static TRIE NewTrie(unsigned node_lim, unsigned string_lim)
{ TRIE T;  int i;
  debug2(DHY, DD, "NewTrie(%d, %d)", node_lim, string_lim);
  ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1,
    sizeof(struct trie_rec) + node_lim*sizeof(short)+string_lim*sizeof(char)));
  T = (TRIE) malloc( sizeof(struct trie_rec)
		     + node_lim*sizeof(short) + string_lim*sizeof(char));
  if( T == (TRIE) NULL )
    Error(36, 4, "run out of memory while constructing hyphenation table",
      FATAL, no_fpos);
  T->magic = TRIE_MAGIC;  T->class_count = 1;
  for( i = 0;  i < MAX_CHAR;  i++ )  T->class[i] = 0;
  T->node_mem = (short *) ( (char *) T + sizeof(struct trie_rec));
  T->node_lim = node_lim;  T->node_free = 0;
  T->string_mem = (FULL_CHAR *) &(T->node_mem[node_lim]);
  T->string_lim = T->string_first = string_lim;
  debug0(DHY, DD, "NewTrie returning.");
  return T;
} /* end NewTrie */


/*****************************************************************************/
/*                                                                           */
/*  static short NewTrieString(str, T)                                       */
/*                                                                           */
/*  Copy a new string into T, and return its offset in string_mem;           */
/*                                                                           */
/*****************************************************************************/

static short NewTrieString(FULL_CHAR *str, TRIE T)
{ short res = T->string_first - StringLength(str) - 1;
  if( res >= 0 )
  { T->string_first = res;  StringCopy(&(T->string_mem[res]), str);
  }
  return res;
} /* end NewTrieString */


/*****************************************************************************/
/*                                                                           */
/*  ststic int NewTrieNode(T)                                                */
/*                                                                           */
/*  Allocate a new empty trie node in T, and return its offset in node_mem.  */
/*                                                                           */
/*****************************************************************************/

static int NewTrieNode(TRIE T)
{ int i;  int res;
  if( T->node_free + T->class_count > T->node_lim )
    Error(36, 5, "hyphenation trie node limit exceeded", INTERN, no_fpos);
  res = T->node_free;  T->node_free += T->class_count;
  for( i = res;  i < T->node_free;  i++ )  T->node_mem[i] = 0;
  return res;
} /* end NewTrieNode */


/*@::AddClassToTrie(), TrieInsert()@******************************************/
/*                                                                           */
/*  static AddClassToTrie(str, T)                                            */
/*                                                                           */
/*  Add a new character class, whose members are the characters of str, to   */
/*  trie T.  This cannot occur after the first insertion.                    */
/*                                                                           */
/*****************************************************************************/

static void AddClassToTrie(FULL_CHAR *str, TRIE T)
{ int i;
  assert( T->string_first == T->string_lim, "AddClassToTrie: after insertion");
  for( i = 0;  str[i] != '\0';  i++ )
    if( T->class[str[i]] == 0 ) T->class[str[i]] = T->class_count;
    else Error(36, 6, "hyphenation class of %c may not be changed",
      INTERN, no_fpos, str[i]);
  T->class_count++;
} /* end AddClassToTrie */


/*****************************************************************************/
/*                                                                           */
/*  static BOOLEAN TrieInsert(key, value, T, fname, hline_num)               */
/*                                                                           */
/*  Insert a new key and value into trie T (originating in file fname on     */
/*  line hline_num).                                                         */
/*                                                                           */
/*****************************************************************************/

static BOOLEAN TrieInsert(FULL_CHAR *key, FULL_CHAR *value, TRIE T,
FULL_CHAR *fname, int hline_num)
{ FULL_CHAR str[MAX_BUFF], compressed_value[MAX_BUFF];
  int i, curr_node, next_node, pos, ch;  short strpos;
  debug2(DHY, DD, "TrieInsert(%s, %s, T)", key, value);

  /* if first insertion, add one node after making sure class_count is even */
  if( T->node_free == 0 )
  { T->class_count = NODE_MULT * ceiling(T->class_count, NODE_MULT);
    ch = NewTrieNode(T);
  }

  AltCompressValue(compressed_value, value);

  /* invariant: curr_node is an existing node of T with prefix str[0..i-1] */
  ClassConvert(key, str, T, fname, hline_num);
  curr_node = i = 0;
  for(;;)
  {
    /* if str is ended, add compressed_value only to string memory */
    if( str[i] == '\0' )
    { if( T->node_mem[curr_node] != 0 )
	Error(36, 7, "hyphenation string %s already inserted",
	  INTERN, no_fpos, key);
      else
      {
	strpos = NewTrieString(compressed_value, T);
	if( strpos < 0 )
	{ debug0(DHY, DD, "TrieInsert returning FALSE (trie full)");
	  return FALSE;
	}
	T->node_mem[curr_node] = - strpos;
      }
      debug0(DHY, DD, "TrieInsert returning TRUE (empty suffix).");
      return TRUE;
    }

    /* if next position is unoccupied, store remainder of str and value */
    next_node = T->node_mem[curr_node + str[i]];
    if( next_node == 0 )
    { ch = NewTrieString(compressed_value, T);
      if( ch < 0 )
      { debug0(DHY, DD, "TrieInsert returning FALSE (trie full)");
	return FALSE;
      }
      strpos = NewTrieString(&str[i+1], T);
      if( strpos < 0 )
      { debug0(DHY, DD, "TrieInsert returning FALSE (trie full)");
	return FALSE;
      }
      T->node_mem[curr_node + str[i]] = - strpos;
      debug0(DHY, DD, "TrieInsert returning (non-empty suffix).");
      return TRUE;
    }

    /* if next position is occupied by a non-empty string, move that */
    /* string down one level and replace it by a trie node           */
    if( next_node < 0 )
    { pos = - next_node;
      ch = T->string_mem[pos];
      if( T->string_first == pos )  T->string_first++;
      T->node_mem[curr_node + str[i]] = next_node = NewTrieNode(T)/NODE_MULT;
      T->node_mem[NODE_MULT*next_node + ch] = -(pos+1);
    }

    /* now next is the offset of the next node to be searched */
    curr_node = NODE_MULT*next_node;  i++;
  }
} /* end TrieInsert */


/*@::BeGetChar(), BePutChar(), BeGetShort(), BePutShort(), etc.@**************/
/*                                                                           */
/*  BeGetChar(fp, pv)                                                        */
/*  BePutChar(fp, v)                                                         */
/*  BeGetShort(fp, pv)                                                       */
/*  BePutShort(fp, v)                                                        */
/*  BeGetInt(fp, pv)                                                         */
/*  BePutInt(fp, v)                                                          */
/*                                                                           */
/*  Get char, short, or int pv from file fp, and put char, short, or int     */
/*  onto file fp.  These routines are designed so that the file can be       */
/*  written or read safely by big-endian and little-endian architectures;    */
/*  this is accomplished by reading and writing one byte at a time to and    */
/*  from a big-endian format file.  All return 0 on success, -1 on failure.  */
/*  Thanks to David W. Sanderson for this code.                              */
/*                                                                           */
/*****************************************************************************/

#define BeGetChar(fp, pv)  ( (c = getc(fp)) == EOF ? -1 : (*pv = c & 0xFF, 0) )
#define BePutChar(fp, v)   ( putc( (char) (v & 0xFF), fp), 0 )

#define BeGetShort(fp, pv)						\
(  (c = getc(fp)) == EOF ? -1 :						\
   (  *pv = (c & 0xFF) << 8,						\
      (c = getc(fp)) == EOF ? -1 : (*pv |= c & 0xFF, 0)			\
   )									\
)

#define BePutShort(fp, v)						\
( putc((v >> 8) & 0xFF, fp), putc(v & 0xFF, fp), 0 )

static int BeGetInt(FILE *fp, int *pv)
{ int c;
  if ((c = getc(fp)) == EOF) return -1;
  *pv = (c & 0xFF) << 24;
  if ((c = getc(fp)) == EOF) return -1;
  *pv |= (c & 0xFF) << 16;
  if ((c = getc(fp)) == EOF) return -1;
  *pv |= (c & 0xFF) << 8;
  if ((c = getc(fp)) == EOF) return -1;
  *pv |= c & 0xFF;
  return 0;
}

static int BePutInt(FILE *fp, int v)
{
  putc((v >> 24) & 0xFF, fp);
  putc((v >> 16) & 0xFF, fp);
  putc((v >> 8) & 0xFF, fp);
  putc(v & 0xFF, fp);
  return 0;
}


/*@::CompressTrie(), TrieRead(), AccumulateRating()@**************************/
/*                                                                           */
/*  static CompressTrie(T)                                                   */
/*                                                                           */
/*  Compress trie T and return its length in characters.                     */
/*                                                                           */
/*****************************************************************************/

static void CompressTrie(TRIE T)
{ FULL_CHAR *p, *q;  int len, i;
  debug0(DHY, DD, "CompressTrie(T), T =");
  debug2(DHY, DD, "Node space: %d capacity, %d used\n",
    T->node_lim, T->node_free);
  debug2(DHY, DD, "String space: %d capacity, %d used\n",
    T->string_lim, T->string_lim - T->string_first);
  ifdebug(DHY, DD, TriePrint(T, stderr));
  T->node_lim = T->node_free;
  for( i = 0;  i < T->node_lim;  i++ )
    if( T->node_mem[i] < 0 )
      T->node_mem[i] = - ( -T->node_mem[i] - T->string_first);
  p = (FULL_CHAR *) &(T->node_mem[T->node_free]);
  q = &(T->string_mem[T->string_first]);
  len = T->string_lim - T->string_first;
  for( i = 0;  i < len;  i++ )  *p++ = *q++;
  T->string_mem = (FULL_CHAR *) &(T->node_mem[T->node_lim]);
  T->string_first = 0;
  T->string_lim = len;
  len = sizeof(struct trie_rec) + T->node_lim * sizeof(short)
				+ T->string_lim * sizeof(FULL_CHAR);
  debug1(DHY, DD, "CompressTrie returning; len = %d, T =", len);
  ifdebug(DHY, DD, TriePrint(T, stderr));
} /* end CompressTrie */


/*****************************************************************************/
/*                                                                           */
/*  static TRIE TrieRead(lnum, success)                                      */
/*                                                                           */
/*  Read in a packed trie if possible, otherwise pack an unpacked one.       */
/*  The trie is to be for language lnum.                                     */
/*                                                                           */
/*  Boolean success is set to true if no errors were encountered.  If the    */
/*  file read was a placeholder, success will be true but still a null       */
/*  TRIE will be returned.                                                   */
/*                                                                           */
/*****************************************************************************/
#define START_STATE		0
#define CLASSES_STATE		1
#define EXCEPTIONS_STATE	2
#define LENGTH_LIMIT_STATE	3
#define PATTERNS_STATE		4

static TRIE TrieRead(LANGUAGE_NUM lnum, BOOLEAN *success)
{ TRIE T;  FILE_NUM unpacked_fnum, packed_fnum;  OBJECT fname;
  FILE *unpacked_fp, *packed_fp;  unsigned len;
  int prev, i, j, c, state, hline_num, length_limit;
#if DEBUG_ON
  int icount = 0;
#endif
  debug2(DHY, DD, "TrieRead(%d %s)", lnum,
    lnum == 0 ? STR_NONE : LanguageString(lnum));

  /* get hyphenation file name from language module */
  fname = LanguageHyph(lnum);
  assert( fname == nilobj || is_word(type(fname)), "TrieRead: fname!" );
  if( fname == nilobj )
  { *success = FALSE;
    return (TRIE) NULL;
  }

  /* define and open packed file */
  debug0(DFS, DD, "  calling DefineFile from TrieRead (1)");
  packed_fnum = DefineFile(string(fname), HYPH_PACKED_SUFFIX,
    &fpos(fname), HYPH_PACKED_FILE, HYPH_PATH);
  if( InitializeAll )
  {
    /* initializing so want to reconstruct packed files */
    /* thanks to Ian Jackson <ian@chiark.greenend.org.uk> for this */
    packed_fp = NULL;
  }
  else
  {
    /* not initializing so use existing packed files if possible */
    packed_fp = OpenFile(packed_fnum, FALSE, FALSE);
  }

  if( packed_fp == NULL )
  {
    /* no packed file, so define and open unpacked one instead */
    FULL_CHAR str[MAX_BUFF], key[MAX_BUFF], value[MAX_BUFF],
		  buff[MAX_BUFF+10];
    int bpos, bcount;
    debug0(DFS, DD, "  calling DefineFile from TrieRead (2)");
    unpacked_fnum = DefineFile(string(fname), HYPH_SUFFIX,
      &fpos(fname), HYPH_FILE, HYPH_PATH);
    unpacked_fp = OpenFile(unpacked_fnum, FALSE, FALSE);
    if( unpacked_fp == NULL )
    { Error(36, 8, "cannot open hyphenation file %s",
	WARN, no_fpos, FileName(unpacked_fnum));
      *success = FALSE;
      return (TRIE) NULL;
    }

    /* check that first line contains magic header or stub */
    if( StringFGets(str, MAX_BUFF, unpacked_fp) == NULL ||
        ( !StringEqual(str, AsciiToFull("Lout hyphenation information\n")) &&
	  !StringEqual(str, AsciiToFull("Lout hyphenation placeholder\n")) )
      )
      Error(36, 9, "header line of hyphenation file %s missing",
	FATAL, no_fpos, FileName(unpacked_fnum));

    /* if file is just a placeholder, exit silently with success */
    if( !StringEqual(str, AsciiToFull("Lout hyphenation information\n")) )
    { *success = TRUE;
      return (TRIE) NULL;
    }

    /* read the classes, exceptions, and patterns from the unpacked file */
    T = NewTrie( (unsigned) 120000,  (unsigned) 32767);
    state = START_STATE;
    hline_num = 1;
    length_limit = 0;
    while( StringFGets(buff, MAX_BUFF, unpacked_fp) != NULL )
    {
      hline_num++;  bpos = 0;
      while( sscanf( (char *) &buff[bpos], "%s%n", str, &bcount) == 1 &&
	str[0] != '%' )
      {
	bpos += bcount;
        DecodeEscapes(str, string(fname), hline_num);

        switch( state )
        {
	  case START_STATE:

	    if( !StringEqual(str, AsciiToFull("Classes:")) )
	      Error(36, 10, "Classes heading of hyphenation file %s missing",
	        FATAL, no_fpos, FileName(unpacked_fnum));
	    state = CLASSES_STATE;
	    break;


	  case CLASSES_STATE:

	    if( StringEqual(str, AsciiToFull("Exceptions:")) )
	    { state = EXCEPTIONS_STATE;
	    }
	    else if( StringEqual(str, AsciiToFull("Patterns:")) )
	    { state = PATTERNS_STATE;
	    }
	    else if( StringEqual(str, AsciiToFull("LengthLimit:")) )
	    { state = LENGTH_LIMIT_STATE;
	    }
	    else
	    { debug1(DHY, DD, "adding class %s", str);
	      AddClassToTrie(str, T);
	    }
	    break;


	  case EXCEPTIONS_STATE:

	    if( StringEqual(str, AsciiToFull("Patterns:")) )
	    { state = PATTERNS_STATE;
	    }
	    else if( StringEqual(str, AsciiToFull("LengthLimit:")) )
	    { state = LENGTH_LIMIT_STATE;
	    }
	    else
	    { prev = CH_EIGHT; j = 0;
	      key[j] = '.', value[j++] = prev, prev = CH_EIGHT;
	      for( i = 0;  str[i] != '\0';  i++ )
	      { if( str[i] == CH_HYPHEN )  prev = CH_NINE;
	        else key[j] = str[i], value[j++] = prev, prev = CH_EIGHT;
	      }
	      key[j] = '.', value[j++] = prev, prev = CH_EIGHT;
	      key[j] = '\0';  value[j] = prev;  value[j+1] = '\0';
	      if( !TrieInsert(key, value, T, string(fname), hline_num) )
	      {
	        Error(36, 11, "hyphenation file %s%s is too large (at line %d)",
		  WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num);
	        *success = FALSE;
	        return (TRIE) NULL;
	      }
	    }
	    break;


	  case LENGTH_LIMIT_STATE:

	    if( StringEqual(str, AsciiToFull("Patterns:")) )
	    { state = PATTERNS_STATE;
	    }
	    else if( sscanf( (char *) str, "%d", &length_limit) != 1 )
	    {
	      Error(36, 20, "bad LengthLimit in hyphenation file %s%s (line %d)",
		WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num);
	      *success = FALSE;
	      return (TRIE) NULL;
	    }
	    break;


	  case PATTERNS_STATE:

	    prev = CH_ZERO; j = 0;
	    for( i = 0;  str[i] != '\0';  i++ )
	    { if( decimaldigit(str[i]) )  prev = str[i];
	      else key[j] = str[i], value[j++] = prev, prev = CH_ZERO;
	    }
	    key[j] = '\0';  value[j] = prev;  value[j+1] = '\0';
	    if( length_limit == 0 || j <= length_limit )
	    {
	      debug3(DHY, DD, "TrieInsert(%s, %s, T) [%d]", key, value, ++icount);
	      if( !TrieInsert(key, value, T, string(fname), hline_num) )
	      {
	        Error(36, 12, "hyphenation file %s%s is too large (at line %d)",
		    WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num);
	        *success = FALSE;
	        return (TRIE) NULL;
	      }
	    }
	    break;


	  default:

	    assert(FALSE, "TrieRead: state");
	    break;

        } /* end switch */
      } /* end while */
    } /* end while */

    if( state != PATTERNS_STATE )
      Error(36, 13, "format error in hyphenation file %s",
	FATAL, no_fpos, FileName(unpacked_fnum));
    fclose(unpacked_fp);
    CompressTrie(T);

    /* Avoid writing and re-reading a cache file in benchmarking mode */
#if 0
    /* write the compressed trie out to the packed file */
    /* cannot use FileName(packed_fnum) because path won't be right */
    StringCopy(buff, FileName(unpacked_fnum));
    StringCopy(&buff[StringLength(buff) - StringLength(HYPH_SUFFIX)],
      HYPH_PACKED_SUFFIX);
    packed_fp = StringFOpen(buff, WRITE_BINARY);
    if( packed_fp == NULL )
      Error(36, 14, "cannot write to hyphenation file %s", FATAL,no_fpos,buff);
    BePutInt(packed_fp, T->magic);
    BePutInt(packed_fp, T->class_count);
    for( i = 0; i < MAX_CHAR; i++ )  BePutChar(packed_fp, T->class[i]);
    /* BePutInt(packed_fp, 0); */ /* placeholder for node_mem now omitted */
    BePutInt(packed_fp, T->node_lim);
    BePutInt(packed_fp, T->node_free);
    /* BePutInt(packed_fp, 0); */  /* placeholder for string_mem now omitted */
    BePutInt(packed_fp, T->string_lim);
    BePutInt(packed_fp, T->string_first);
    for( i=0; i < T->node_free; i++ )  BePutShort(packed_fp, T->node_mem[i]);
    for( i=0; i < T->string_lim; i++)  BePutChar(packed_fp, T->string_mem[i]);
    fclose(packed_fp);

    /* free T */
    ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1,
      sizeof(struct trie_rec) + 120000*sizeof(short)+32767*sizeof(char)));
    free(T);

    /* now try again to open packed_fnum, the file just written */
    packed_fp = OpenFile(packed_fnum, FALSE, FALSE);
    if( packed_fp == NULL )
      Error(36, 15, "cannot open hyphenation file %s",
	FATAL, no_fpos, FileName(packed_fnum));
#endif
  } /* end if( packed_fp == NULL ) */

#if 0
  /* now packed hyphenation file is open, read it in */
  fseek(packed_fp, 0L, SEEK_END);
  len = (unsigned) ftell(packed_fp); rewind(packed_fp);
  ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1, len));
  /* the 2*sizeof(void*) is for the sizes of node_mem and string_mem */
  T = (TRIE) malloc(len + 2*sizeof(void*));
  if( T == (TRIE) NULL )
    Error(36, 16, "run out of memory while reading hyphenation table",
      FATAL, no_fpos);
  if( BeGetInt(packed_fp, &T->magic) != 0 )
    Error(36, 17, "error on read from packed hyphenation file %s",
      FATAL, no_fpos, FileName(packed_fnum));
  if( T->magic != TRIE_MAGIC )
    Error(36, 18, "bad magic number in hyphenation file %s",
      FATAL, no_fpos, FileName(packed_fnum));
  BeGetInt(packed_fp, &T->class_count);
  for( i = 0; i < MAX_CHAR; i++ )  BeGetChar(packed_fp, &T->class[i]);
  /* BeGetInt(packed_fp, &i); */  /* placeholder for node_mem now omitted */
  BeGetInt(packed_fp, &T->node_lim);
  BeGetInt(packed_fp, &T->node_free);
  /* BeGetInt(packed_fp, &i); */ /* placeholder for string_mem now omitted */
  BeGetInt(packed_fp, &T->string_lim);
  BeGetInt(packed_fp, &T->string_first);
  T->node_mem = (short *) ( (char *) T + sizeof(struct trie_rec) );
  T->string_mem = (FULL_CHAR *) &(T->node_mem[T->node_lim]);
  for( i = 0; i < T->node_free; i++ )  BeGetShort(packed_fp, &T->node_mem[i]);
  for( i = 0; i < T->string_lim; i++ ) BeGetChar(packed_fp, &T->string_mem[i]);
  fclose(packed_fp);
#endif

  /* debug and exit */
  debug0(DHY, DD, "TrieRead returning, T =");
  *success = TRUE;
  ifdebug(DHY, DD, TriePrint(T, stderr));
  return T;
} /* end TrieRead */


/*****************************************************************************/
/*                                                                           */
/*  AccumulateRating(x, y)                                                   */
/*                                                                           */
/*  Accumulate the hyphenation rating string x into y.                       */
/*                                                                           */
/*****************************************************************************/

#define AccumulateRating(x, y)						\
{ FULL_CHAR *p = x, *q = y;						\
  while( *p )								\
  { if( *p > *q )  *q = *p;						\
    p++, q++;								\
  }									\
} /* end AccumulateRating */


/*@::ReadHyphTable()@*********************************************************/
/*                                                                           */
/*  BOOLEAN ReadHyphTable(lnum)                                              */
/*                                                                           */
/*  Read hyphenation table for language lnum.                                */
/*                                                                           */
/*****************************************************************************/

BOOLEAN ReadHyphTable(LANGUAGE_NUM lnum)
{ BOOLEAN res;
  debug1(DHY, DD, "ReadHyphTable(%d)", lnum);
  assert(lnum > 0, "ReadHyphTable: lnum <= 0!");
  assert(HyphTables[lnum]==(TRIE) NULL && !TriedFile[lnum], "ReadHyphTable!");
  HyphTables[lnum] = TrieRead(lnum, &res);
  TriedFile[lnum] = TRUE;
  debug2(DHY, DD, "ReadHyphTable(%d) returning %s", lnum, bool(res));
  return res;
} /* end ReadHyphTable */


/*@::Hyphenate@***************************************************************/
/*                                                                           */
/*  OBJECT Hyphenate(x)                                                      */
/*                                                                           */
/*  Hyphenate ACAT object x, returning the hyphenated result.                */
/*                                                                           */
/*****************************************************************************/

OBJECT Hyphenate(OBJECT x)
{ OBJECT link, y, z, next_link;  TRIE T;  LANGUAGE_NUM lnum;
  FULL_CHAR str[MAX_WORD+2], rate[MAX_WORD+3], val[MAX_WORD+3],
    *class, *key, *ss, *s, *p, *rem, *lig, *a, *b;
  int start, stop, i, curr_node, next_node, pos;
  BOOLEAN hyphenated, success;
  assert( type(x) == ACAT, "Hyphenate: type(x) != ACAT!" );
  debug1(DHY, D, "Hyphenate(%s)", EchoObject(x));

  /* for each word y of x, try to hyphenate it */
  for( link = Down(x);  link != x;  link = NextDown(link) )
  { Child(y, link);
    if( !is_word(type(y)) || string(y)[0] == '\0' || !word_hyph(y) )
    {
      if( type(y) == GAP_OBJ && mode(gap(y)) == HYPH_MODE )
	nobreak(gap(y)) = FALSE;
      continue;
    }
    debug1(DHY, DD, "Hyphenate() examining %s", EchoObject(y));

    /* determine T, the trie to use */
    lnum = word_language(y);
    if( lnum == 0 )
      Error(36, 19, "no current language for word %s",
	FATAL, &fpos(y), string(y));
    T = HyphTables[lnum];

    /* if no trie is present, try to get it from a file */
    if( T == (TRIE) NULL )
    { if( !TriedFile[lnum] )
      { T = HyphTables[lnum] = TrieRead(lnum, &success);
        TriedFile[lnum] = TRUE;
      }
      if( T == (TRIE) NULL )
      { debug1(DHY, DD, "Hyphenate continuing (no trie for %s)", string(y));
        continue;
      }
    }

    /* start := index of first letter of y, stop := index following last */
    key = string(y);  class = T->class;
    for( start = 0;  class[key[start]] == PUNCT_CLASS;  start++ );
    for( stop = start;  class[key[stop]] > PUNCT_CLASS;  stop++ );

    /* if a - ended the run, hyphenate there only */
    if( key[stop] == CH_HYPHEN )
    {
      /* actually, don't hyphenate if the hyphen is last in the word [thanks Uwe] */
      if( key[stop+1] == '\0' )
	continue;

      next_link = NextDown(link);
      z = MakeWord(WORD, &key[stop+1], &fpos(y));
      word_font(z) = word_font(y);
      word_colour(z) = word_colour(y);
      word_outline(z) = word_outline(y);
      word_language(z) = word_language(y);
      word_hyph(z) = word_hyph(y);
      underline(z) = underline(y);
      debug1(DHY, DD, "Hyphenate (hyph case) making fragment %s", string(z));
      FontWordSize(z);
      Link(NextDown(link), z);
      New(z, GAP_OBJ);
      hspace(z) = vspace(z) = 0;
      SetGap(gap(z), FALSE, FALSE, TRUE, FIXED_UNIT, HYPH_MODE, 0);
      underline(z) = underline(y);
      Link(NextDown(link), z);
      Link(z, MakeWord(WORD, STR_GAP_ZERO_HYPH, &fpos(y)));
      key[stop + 1] = '\0';
      FontWordSize(y);
      /* *** link = PrevDown(next_link); */
      link = NextDown(link);
      continue;
    }

    /* do not hyphenate if less than 5 letters, or a kill char is nearby */
    if( stop - start < 5 )  continue;
    if( key[stop] != '\0' && class[key[stop]] == KILL_CLASS )  continue;

    /* let str[] be the converted substring, let rate[] be all CH_ZERO */
    str[0] = PUNCT_CLASS;  rate[0] = CH_ZERO;
    for( i = 0;  i < stop - start;  i++ )
    { str[i+1] = class[key[start + i]];
      rate[i+1] = CH_ZERO;
    }
    str[i+1] = PUNCT_CLASS;  rate[i+1] = CH_ZERO;
    str[i+2] = '\0';  rate[i+2] = CH_ZERO;
    rate[i+3] = '\0';
    ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr));

    /* for each suffix of str[], accumulate patterns matching its prefixes */
    ss = str;
    do
    {
      ifdebug(DHY, DD,
	fprintf(stderr, "trying suffix \"");
	for( p = ss; *p != 0;  p++ )  fprintf(stderr, "%c", findrep(*p, T));
	fprintf(stderr, "\"\n");
      );
	
      /* accumulate all prefixes of ss */
      curr_node = 0;  s = ss;
      for(;;)
      {
	/* if curr_node has empty string, that is one prefix */
	pos = T->node_mem[curr_node];
	if( pos < 0 )
	{ AltUncompressValue(&(T->string_mem[- pos]), val);
	  AccumulateRating(val, rate+(ss-str));
	  debug1(DHY, DD, " found %s", val);
	}

	/* if ss is finished, no other prefixes are possible */
	if( *s == '\0' )  break;

	/* determine next_node and break if empty */
	next_node = T->node_mem[curr_node + *s];
	if( next_node == 0 )  break;

	/* if next_node is a string, check whether it is a prefix of ss */
	if( next_node < 0 )
	{ rem = &(T->string_mem[-next_node]);
	  do
	  { if( *rem == '\0' )
	    { AltUncompressValue(rem+1, val);
	      AccumulateRating(val, rate+(ss-str));
	      debug1(DHY, DD, " found %s", val);
	      break;
	    }
	  } while( *++s == *rem++ );
	  break;
	}

	/* otherwise go on to the next trie node */
	curr_node = NODE_MULT*next_node;  s++;
      }
    } while( *(++ss + 1) != PUNCT_CLASS );
    ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr));

    /* set rate[i] to CH_ZERO whenever key[start+i-1] lies within a ligature */
    lig = finfo[word_font(y)].lig_table;
    for( p = key, i = 2;  *p != '\0';  p++, i++ )
    { if( lig[*p] > 1 )
      { a = &lig[ lig[*p] + MAX_CHARS ];
        while( *a++ == *p )
        { b = p+1;
          while( *a == *b && *(a+1) != '\0' && *b != '\0' )  a++, b++;
          if( *(a+1) == '\0' )
          { rate[i] = CH_ZERO;
            break;
          }
          else
          { while( *++a );
            a++;
          }
        }
      }
    }
    ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr));

    /* now rate[] has accumulated ratings; use it to perform hyphenations */
    hyphenated = FALSE;
    next_link = NextDown(link);
    for( i = stop - start - 1;  i >= 3;  i-- )
    {
      /* hyphenate at i if rate[i] is odd */
      if( is_odd(rate[i]) )
      {	z = MakeWord(WORD, &key[start+i-1], &fpos(y));
	word_font(z) = word_font(y);
	word_colour(z) = word_colour(y);
	word_outline(z) = word_outline(y);
	word_language(z) = word_language(y);
	word_hyph(z) = word_hyph(y);
	underline(z) = underline(y);
        debug1(DHY, D, "Hyphenate making fragment %s", string(z));
	FontWordSize(z);
	Link(NextDown(link), z);
	New(z, GAP_OBJ);
	hspace(z) = vspace(z) = 0;
	SetGap(gap(z), FALSE, FALSE, TRUE, FIXED_UNIT, HYPH_MODE, 0);
	underline(z) = underline(y);
	Link(NextDown(link), z);
	Link(z, MakeWord(WORD, STR_GAP_ZERO_HYPH, &fpos(y)));
	key[start + i - 1] = '\0';
	hyphenated = TRUE;
      }
    }
    if( hyphenated )
    { FontWordSize(y);
      link = PrevDown(next_link);
    }

  } /* end for each word */

  debug3(DHY, D, "Hyphenate returning %s,%s %s",
    EchoLength(back(x, COLM)), EchoLength(fwd(x, COLM)), EchoObject(x));
  return x;
} /* end Hyphenate */
