| /*@z36.c:Hyphenation: Declarations@*******************************************/ |
| /* */ |
| /* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.24) */ |
| /* COPYRIGHT (C) 1991, 2000 Jeffrey H. Kingston */ |
| /* */ |
| /* Jeffrey H. Kingston (jeff@cs.usyd.edu.au) */ |
| /* Basser Department of Computer Science */ |
| /* The University of Sydney 2006 */ |
| /* AUSTRALIA */ |
| /* */ |
| /* This program is free software; you can redistribute it and/or modify */ |
| /* it under the terms of the GNU General Public License as published by */ |
| /* the Free Software Foundation; either Version 2, or (at your option) */ |
| /* any later version. */ |
| /* */ |
| /* This program is distributed in the hope that it will be useful, */ |
| /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ |
| /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ |
| /* GNU General Public License for more details. */ |
| /* */ |
| /* You should have received a copy of the GNU General Public License */ |
| /* along with this program; if not, write to the Free Software */ |
| /* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */ |
| /* */ |
| /* FILE: z36.c */ |
| /* MODULE: Hyphenation */ |
| /* EXTERNS: Hyphenate() */ |
| /* */ |
| /*****************************************************************************/ |
| #include "externs.h" |
| #define NODE_MULT 4 /* what to multiply node indexes by */ |
| #define MAX_CHAR 256 /* max chars represented in one char */ |
| #define TRIE_MAGIC 5361534 |
| #define KILL_CLASS 0 /* characters preventing hyphenation */ |
| #define PUNCT_CLASS 1 /* characters delimiting hyphenation */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static tex_codes[] */ |
| /* */ |
| /* TeX hyphenation files often contain escape sequences consisting of a */ |
| /* backslash and two or three characters to denote 8-bit characters. This */ |
| /* code will read and translate such sequences if they are in the following */ |
| /* list. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static char *tex_codes[] = { |
| |
| "Agrave", "`A", "\300", |
| "Aacute", "'A", "\301", |
| "Acircumflex", "^A", "\302", |
| "Atilde", "~A", "\303", |
| "Adieresis", "\"A", "\304", |
| "agrave", "`a", "\340", |
| "aacute", "'a", "\341", |
| "acircumflex", "^a", "\342", |
| "atilde", "~a", "\343", |
| "adieresis", "\"a", "\344", |
| |
| "ccedilla", "cc", "\347", |
| |
| "Egrave", "`E", "\310", |
| "Eacute", "'E", "\311", |
| "Ecircumflex", "^E", "\312", |
| "Edieresis", "\"E", "\313", |
| "egrave", "`e", "\350", |
| "eacute", "'e", "\351", |
| "ecircumflex", "^e", "\352", |
| "edieresis", "\"e", "\353", |
| |
| "Igrave", "`I", "\314", |
| "Iacute", "'I", "\315", |
| "Icircumflex", "^I", "\316", |
| "Idieresis", "\"I", "\317", |
| "igrave", "`\\i", "\354", |
| "iacute", "'\\i", "\355", |
| "icircumflex", "^\\i", "\356", |
| "idieresis", "\"\\i","\357", |
| |
| "Ograve", "`O", "\322", |
| "Oacute", "'O", "\323", |
| "Ocircumflex", "^O", "\324", |
| "Otilde", "~O", "\325", |
| "Odieresis", "\"O", "\326", |
| "ograve", "`o", "\362", |
| "oacute", "'o", "\363", |
| "ocircumflex", "^o", "\364", |
| "otilde", "~o", "\365", |
| "odieresis", "\"o", "\366", |
| |
| "Ugrave", "`U", "\331", |
| "Uacute", "'U", "\332", |
| "Ucircumflex", "^U", "\333", |
| "Udieresis", "\"U", "\334", |
| "ugrave", "`u", "\371", |
| "uacute", "'u", "\372", |
| "ucircumflex", "^u", "\373", |
| "udieresis", "\"u", "\374", |
| |
| "", "", "" |
| }; |
| |
| static void DecodeEscapes(FULL_CHAR *str, FULL_CHAR *fname, int hline_num) |
| { FULL_CHAR *p, *q; |
| int i; |
| p = q = str; |
| while( *q != '\0' ) |
| { |
| if( *q == '\\' ) |
| { for( i = 0; tex_codes[i][0] != '\0'; i += 3 ) |
| { |
| if( StringBeginsWith(q+1, AsciiToFull(tex_codes[i+1])) ) |
| break; |
| } |
| if( tex_codes[i][0] != '\0' ) |
| { |
| StringCopy(p, tex_codes[i+2]); |
| p += StringLength(AsciiToFull(tex_codes[i+2])); |
| q += StringLength(AsciiToFull(tex_codes[i+1])) + 1; |
| } |
| else |
| { |
| Error(36, 1, "in hyphenation file %s, unknown escape sequence (line %d)", |
| FATAL, no_fpos, fname, hline_num); |
| } |
| } |
| else *p++ = *q++; |
| } |
| *p++ = '\0'; |
| } /* end DecodeEscapes */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static TRIE HyphTables[] */ |
| /* */ |
| /* The packed hyphenation tables, indexed by language. An entry is NULL */ |
| /* when the table for that language has not yet been read in; TriedFile */ |
| /* is TRUE after we have tried to read that file, whether or not we were */ |
| /* successful. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| typedef struct trie_rec |
| { int magic; /* a magic number to make sure ok */ |
| int class_count; /* the number of character classes */ |
| unsigned char class[MAX_CHAR]; /* the character classes */ |
| short *node_mem; /* the node memory */ |
| int node_lim; /* top of node memory */ |
| int node_free; /* first free space in node memory */ |
| FULL_CHAR *string_mem; /* the string memory */ |
| int string_lim; /* top of string memory */ |
| int string_first; /* the first (last inserted) string */ |
| } *TRIE; |
| |
| static TRIE HyphTables[MAX_LANGUAGE] = { NULL }; |
| static BOOLEAN TriedFile[MAX_LANGUAGE] = { FALSE }; |
| |
| |
| /*@::CompressValue(), UncompressValue()@**************************************/ |
| /* */ |
| /* CompressValue(p, q) */ |
| /* */ |
| /* Compress value string p, placing the result in q. */ |
| /* */ |
| /*****************************************************************************/ |
| #define FirstHalf(y) ( (y) >> 4 ) |
| #define LastHalf(y) ( (y) & 15 ) |
| #define AssignFirstHalf(x, y) ( (x) = ((y) << 4) ) |
| #define AssignLastHalf(x, y) ( (x) |= (y) ) |
| |
| #define CompressValue(compressed, uncompressed) \ |
| /* FULL_CHAR *compressed, *uncompressed; */ \ |
| { register FULL_CHAR *p, *q; \ |
| p = compressed; q = uncompressed; \ |
| for( ; ; ) \ |
| { \ |
| if( *q == (FULL_CHAR) '\0' ) \ |
| { *p = (FULL_CHAR) '\0'; \ |
| break; \ |
| } \ |
| AssignFirstHalf(*p, *q++ - '0' + 2); \ |
| if( *q == (FULL_CHAR) '\0' ) \ |
| { *++p = (FULL_CHAR) '\0'; \ |
| break; \ |
| } \ |
| AssignLastHalf(*p, *q++ - '0' + 2); \ |
| p++; \ |
| } \ |
| } |
| |
| /*****************************************************************************/ |
| /* */ |
| /* UncompressValue(q, p) */ |
| /* */ |
| /* Uncompress value string q, placing the result in p. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| #define UncompressValue(compressed, uncompressed) \ |
| /* FULL_CHAR *compressed, *uncompressed; */ \ |
| { register FULL_CHAR *p, *q; \ |
| p = compressed; q = uncompressed; \ |
| for( ; ; ) \ |
| { \ |
| if( FirstHalf(*p) == '\0' ) break; \ |
| *q++ = FirstHalf(*p) + '0' - 2; \ |
| if( LastHalf(*p) == '\0' ) break; \ |
| *q++ = LastHalf(*p) + '0' - 2; \ |
| p++; \ |
| } \ |
| *q = (FULL_CHAR) '\0'; \ |
| } |
| |
| /*@::AltCompressValue(), AltUncompressValue()@********************************/ |
| /* */ |
| /* AltCompressValue(compressed, uncompressed) */ |
| /* */ |
| /* Compress value string, placing the result in compressed. */ |
| /* */ |
| /* This is an alternative compression scheme to the one given above, which */ |
| /* should give better compression. The result is a sequence of pairs of */ |
| /* the form (skip, value) saying that we are to skip so many places and */ |
| /* then insert the given non-zero value. All the other values are zero. */ |
| /* Skip values are 4-bit numbers (maximum skip is 15, but we will insert */ |
| /* a 15 skip with a zero value in the rare case of skipping further). */ |
| /* Values are also 4-bit numbers, known to be non-zero. So the memory */ |
| /* cost is 8 bits per non-zero value. */ |
| /* */ |
| /*****************************************************************************/ |
| #define CharPack(ch, a, b) (ch = ((a) << 4) | (b)) |
| #define CharUnPack(ch, a, b) ((a) = (ch) >> 4, (b) = (ch) & 15) |
| |
| #define AltCompressValue(compressed, uncompressed) \ |
| /* FULL_CHAR *compressed, *uncompressed; */ \ |
| { register FULL_CHAR *p, *q, *prev; \ |
| prev = (uncompressed) - 1; p = (compressed); \ |
| for( q = (uncompressed); *q != (FULL_CHAR) '\0'; q++ ) \ |
| { \ |
| if( *q != (FULL_CHAR) '0' || q - prev - 1 >= 15 ) \ |
| { \ |
| CharPack(*p++, q - prev - 1, *q - '0' + 2); \ |
| prev = q; \ |
| } \ |
| } \ |
| *p++ = (FULL_CHAR) '\0'; \ |
| } |
| |
| /*****************************************************************************/ |
| /* */ |
| /* AltUncompressValue(q, p) */ |
| /* */ |
| /* Uncompress value string q, placing the result in p. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| #define AltUncompressValue(compressed, uncompressed) \ |
| { register FULL_CHAR *p, *q, xval; int i, skip; \ |
| q = (uncompressed); \ |
| for( p = (compressed); *p != (FULL_CHAR) '\0'; p++ ) \ |
| { CharUnPack(*p, skip, xval); \ |
| for( i = 0; i < skip; i++ ) \ |
| *q++ = (FULL_CHAR) '0'; \ |
| *q++ = (FULL_CHAR) (xval + '0' - 2); \ |
| } \ |
| *q++ = (FULL_CHAR) '\0'; \ |
| debug1(DHY, D, "AltUncompressValue returning %s", (uncompressed)); \ |
| } |
| |
| /* *** |
| static void AltUncompressValue(FULL_CHAR *compressed, FULL_CHAR *uncompressed) |
| { register FULL_CHAR *p, *q, xval; int i, skip; |
| q = uncompressed; |
| for( p = compressed; *p != (FULL_CHAR) '\0'; p++ ) |
| { CharUnPack(*p, skip, xval); |
| for( i = 0; i < skip; i++ ) |
| *q++ = (FULL_CHAR) '0'; |
| *q++ = (FULL_CHAR) (xval + '0' - 2); |
| } |
| *q++ = (FULL_CHAR) '\0'; |
| debug1(DHY, D, "AltUncompressValue returning %s", uncompressed); |
| } |
| *** */ |
| |
| /*@@**************************************************************************/ |
| /* */ |
| /* ClassConvert(in, out, T, fname, hline_num) */ |
| /* */ |
| /* Set out[i] to the character class of in[i] in T, for all i. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| #define ClassConvert(in, out, T, fname, hline_num) \ |
| { int i; \ |
| for( i = 0; in[i] != '\0'; i++ ) \ |
| if( T->class[in[i]] != 0 ) out[i] = T->class[in[i]]; \ |
| else \ |
| Error(36, 2, "in hyphenation file %s, line %d: character (octal %o) is not in any class", \ |
| FATAL, no_fpos, fname, hline_num, in[i]); \ |
| out[i] = '\0'; \ |
| } /* end ClassConvert */ |
| |
| |
| /*@::findrep(), TrieRetrieve(), ShowRate()@***********************************/ |
| /* */ |
| /* findrep(i, T) Returns one character whose class in T is i. */ |
| /* */ |
| /*****************************************************************************/ |
| #if DEBUG_ON |
| |
| static FULL_CHAR findrep(int i, TRIE T) |
| { int ch; |
| for( ch = 0; ch < MAX_CHAR; ch++ ) |
| if( T->class[ch] == i ) return (FULL_CHAR) ch; |
| Error(36, 3, "DoTriePrint: findrep failed", INTERN, no_fpos); |
| return (FULL_CHAR) ch; /* never reached, but gcc doesn't know that */ |
| } /* end findrep */ |
| |
| |
| #if 0 |
| /*****************************************************************************/ |
| /* */ |
| /* static FULL_CHAR *TrieRetrieve(key, T) */ |
| /* */ |
| /* Retrieve the value associated with key in T, or NULL if not present. */ |
| /* This procedure is not presently in use. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static FULL_CHAR *TrieRetrieve(FULL_CHAR *key, TRIE T) |
| { FULL_CHAR str[MAX_BUFF]; int i, curr_node, next_node, pos; |
| debug1(DHY, DD, "TrieRetrieve(%s, T)", key); |
| ClassConvert(key, str, T, STR_EMPTY, 0); |
| |
| /* invariant: curr_node is an existing node of T with prefix str[0..i-1] */ |
| curr_node = i = 0; |
| for(;;) |
| { |
| /* if next_node is 0, the string was never inserted */ |
| next_node = T->node_mem[curr_node + str[i]]; |
| if( next_node == 0 ) return (FULL_CHAR *) NULL; |
| |
| /* if next_node < 0 it represents an offset into the string memory */ |
| if( next_node < 0 ) |
| { pos = - next_node; |
| if( str[i] != '\0' ) |
| { do |
| { if( str[++i] != T->string_mem[pos++] ) return (FULL_CHAR *) NULL; |
| } while( str[i] != '\0' ); |
| } |
| return &(T->string_mem[pos]); |
| } |
| |
| /* otherwise next_node is the trie node to be searched next */ |
| curr_node = NODE_MULT*next_node; i++; |
| } |
| } /* end TrieRetrieve */ |
| #endif |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static ShowRate(key, start, stop, rate, fp) */ |
| /* */ |
| /* Debug print of key[] and rate[] on file fp. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static void ShowRate(FULL_CHAR *key, int start, int stop, FULL_CHAR *rate, |
| FILE *fp) |
| { int i; |
| fprintf(fp, "key: "); |
| for( i = start; i < stop; i++ ) fprintf(fp, " %c", key[i]); |
| fprintf(fp, "\nrate:"); |
| for( i = 0; rate[i] != '\0'; i++ ) fprintf(fp, " %c", rate[i]); |
| fprintf(fp, "\n"); |
| } /* end ShowRate */ |
| |
| |
| /*@::DoTriePrint(), TriePrint()@**********************************************/ |
| /* */ |
| /* static DoTriePrint(T, node, len, fp) */ |
| /* */ |
| /* Print on file fp the subset of the entries of trie T stored in node and */ |
| /* its descendants. The node has prefix prefix[0..len-1]. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static FULL_CHAR prefix[MAX_BUFF]; |
| |
| static void DoTriePrint(TRIE T, int node, int len, FILE *fp) |
| { int i, next_node, pos; FULL_CHAR str[20]; |
| for( i = 0; i < T->class_count; i++ ) |
| { |
| /* if next_node < 0, have string to print */ |
| next_node = T->node_mem[node + i]; |
| if( next_node < 0 ) |
| { |
| prefix[len] = '\0'; |
| fprintf(fp, "%s", prefix); |
| pos = - next_node; |
| if( i != 0 ) |
| { |
| fprintf(fp, "%c", findrep(i, T)); |
| while( T->string_mem[pos] != '\0' ) |
| { fprintf(fp, "%c", findrep(T->string_mem[pos], T)); |
| pos++; |
| } |
| pos++; |
| } |
| AltUncompressValue(&(T->string_mem[pos]), str); |
| fprintf(fp, " %s\n", str); |
| } |
| |
| /* else if next_node > 0 have a child node to explore */ |
| else if( next_node > 0 ) |
| { assert( i > 0, "DoTriePrint: i == 0!" ); |
| prefix[len] = findrep(i, T); |
| prefix[len+1] = '\0'; |
| DoTriePrint(T, NODE_MULT*next_node, len+1, fp); |
| } |
| } |
| } /* end DoTriePrint */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static TriePrint(T, fp) */ |
| /* */ |
| /* Print trie T on file fp. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static void TriePrint(TRIE T, FILE *fp) |
| { int i, ch; |
| assert( T-> magic == TRIE_MAGIC, "TriePrint: magic!" ); |
| fprintf(fp, "Classes:"); |
| for( i = 1; i < T->class_count; i++ ) |
| { fprintf(fp, " "); |
| for( ch = 0; ch < MAX_CHAR; ch++ ) |
| if( T->class[ch] == i ) fprintf(fp, "%c", ch); |
| } |
| fprintf(fp, "\n"); |
| fprintf(fp, "Node space: %d capacity, %d used\n", T->node_lim, T->node_free); |
| fprintf(fp, "String space: %d capacity, %d used\n", T->string_lim, |
| T->string_lim - T->string_first); |
| prefix[0] = '\0'; |
| DoTriePrint(T, 0, 0, fp); |
| } /* end TriePrint */ |
| #endif |
| |
| |
| /*@::NewTrie(), NewTrieString(), NewTrieNode()@*******************************/ |
| /* */ |
| /* static TRIE NewTrie(node_lim, string_lim) */ |
| /* */ |
| /* Initialize a new trie with this much space for nodes and strings. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static TRIE NewTrie(unsigned node_lim, unsigned string_lim) |
| { TRIE T; int i; |
| debug2(DHY, DD, "NewTrie(%d, %d)", node_lim, string_lim); |
| ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1, |
| sizeof(struct trie_rec) + node_lim*sizeof(short)+string_lim*sizeof(char))); |
| T = (TRIE) malloc( sizeof(struct trie_rec) |
| + node_lim*sizeof(short) + string_lim*sizeof(char)); |
| if( T == (TRIE) NULL ) |
| Error(36, 4, "run out of memory while constructing hyphenation table", |
| FATAL, no_fpos); |
| T->magic = TRIE_MAGIC; T->class_count = 1; |
| for( i = 0; i < MAX_CHAR; i++ ) T->class[i] = 0; |
| T->node_mem = (short *) ( (char *) T + sizeof(struct trie_rec)); |
| T->node_lim = node_lim; T->node_free = 0; |
| T->string_mem = (FULL_CHAR *) &(T->node_mem[node_lim]); |
| T->string_lim = T->string_first = string_lim; |
| debug0(DHY, DD, "NewTrie returning."); |
| return T; |
| } /* end NewTrie */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static short NewTrieString(str, T) */ |
| /* */ |
| /* Copy a new string into T, and return its offset in string_mem; */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static short NewTrieString(FULL_CHAR *str, TRIE T) |
| { short res = T->string_first - StringLength(str) - 1; |
| if( res >= 0 ) |
| { T->string_first = res; StringCopy(&(T->string_mem[res]), str); |
| } |
| return res; |
| } /* end NewTrieString */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* ststic int NewTrieNode(T) */ |
| /* */ |
| /* Allocate a new empty trie node in T, and return its offset in node_mem. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static int NewTrieNode(TRIE T) |
| { int i; int res; |
| if( T->node_free + T->class_count > T->node_lim ) |
| Error(36, 5, "hyphenation trie node limit exceeded", INTERN, no_fpos); |
| res = T->node_free; T->node_free += T->class_count; |
| for( i = res; i < T->node_free; i++ ) T->node_mem[i] = 0; |
| return res; |
| } /* end NewTrieNode */ |
| |
| |
| /*@::AddClassToTrie(), TrieInsert()@******************************************/ |
| /* */ |
| /* static AddClassToTrie(str, T) */ |
| /* */ |
| /* Add a new character class, whose members are the characters of str, to */ |
| /* trie T. This cannot occur after the first insertion. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static void AddClassToTrie(FULL_CHAR *str, TRIE T) |
| { int i; |
| assert( T->string_first == T->string_lim, "AddClassToTrie: after insertion"); |
| for( i = 0; str[i] != '\0'; i++ ) |
| if( T->class[str[i]] == 0 ) T->class[str[i]] = T->class_count; |
| else Error(36, 6, "hyphenation class of %c may not be changed", |
| INTERN, no_fpos, str[i]); |
| T->class_count++; |
| } /* end AddClassToTrie */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static BOOLEAN TrieInsert(key, value, T, fname, hline_num) */ |
| /* */ |
| /* Insert a new key and value into trie T (originating in file fname on */ |
| /* line hline_num). */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static BOOLEAN TrieInsert(FULL_CHAR *key, FULL_CHAR *value, TRIE T, |
| FULL_CHAR *fname, int hline_num) |
| { FULL_CHAR str[MAX_BUFF], compressed_value[MAX_BUFF]; |
| int i, curr_node, next_node, pos, ch; short strpos; |
| debug2(DHY, DD, "TrieInsert(%s, %s, T)", key, value); |
| |
| /* if first insertion, add one node after making sure class_count is even */ |
| if( T->node_free == 0 ) |
| { T->class_count = NODE_MULT * ceiling(T->class_count, NODE_MULT); |
| ch = NewTrieNode(T); |
| } |
| |
| AltCompressValue(compressed_value, value); |
| |
| /* invariant: curr_node is an existing node of T with prefix str[0..i-1] */ |
| ClassConvert(key, str, T, fname, hline_num); |
| curr_node = i = 0; |
| for(;;) |
| { |
| /* if str is ended, add compressed_value only to string memory */ |
| if( str[i] == '\0' ) |
| { if( T->node_mem[curr_node] != 0 ) |
| Error(36, 7, "hyphenation string %s already inserted", |
| INTERN, no_fpos, key); |
| else |
| { |
| strpos = NewTrieString(compressed_value, T); |
| if( strpos < 0 ) |
| { debug0(DHY, DD, "TrieInsert returning FALSE (trie full)"); |
| return FALSE; |
| } |
| T->node_mem[curr_node] = - strpos; |
| } |
| debug0(DHY, DD, "TrieInsert returning TRUE (empty suffix)."); |
| return TRUE; |
| } |
| |
| /* if next position is unoccupied, store remainder of str and value */ |
| next_node = T->node_mem[curr_node + str[i]]; |
| if( next_node == 0 ) |
| { ch = NewTrieString(compressed_value, T); |
| if( ch < 0 ) |
| { debug0(DHY, DD, "TrieInsert returning FALSE (trie full)"); |
| return FALSE; |
| } |
| strpos = NewTrieString(&str[i+1], T); |
| if( strpos < 0 ) |
| { debug0(DHY, DD, "TrieInsert returning FALSE (trie full)"); |
| return FALSE; |
| } |
| T->node_mem[curr_node + str[i]] = - strpos; |
| debug0(DHY, DD, "TrieInsert returning (non-empty suffix)."); |
| return TRUE; |
| } |
| |
| /* if next position is occupied by a non-empty string, move that */ |
| /* string down one level and replace it by a trie node */ |
| if( next_node < 0 ) |
| { pos = - next_node; |
| ch = T->string_mem[pos]; |
| if( T->string_first == pos ) T->string_first++; |
| T->node_mem[curr_node + str[i]] = next_node = NewTrieNode(T)/NODE_MULT; |
| T->node_mem[NODE_MULT*next_node + ch] = -(pos+1); |
| } |
| |
| /* now next is the offset of the next node to be searched */ |
| curr_node = NODE_MULT*next_node; i++; |
| } |
| } /* end TrieInsert */ |
| |
| |
| /*@::BeGetChar(), BePutChar(), BeGetShort(), BePutShort(), etc.@**************/ |
| /* */ |
| /* BeGetChar(fp, pv) */ |
| /* BePutChar(fp, v) */ |
| /* BeGetShort(fp, pv) */ |
| /* BePutShort(fp, v) */ |
| /* BeGetInt(fp, pv) */ |
| /* BePutInt(fp, v) */ |
| /* */ |
| /* Get char, short, or int pv from file fp, and put char, short, or int */ |
| /* onto file fp. These routines are designed so that the file can be */ |
| /* written or read safely by big-endian and little-endian architectures; */ |
| /* this is accomplished by reading and writing one byte at a time to and */ |
| /* from a big-endian format file. All return 0 on success, -1 on failure. */ |
| /* Thanks to David W. Sanderson for this code. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| #define BeGetChar(fp, pv) ( (c = getc(fp)) == EOF ? -1 : (*pv = c & 0xFF, 0) ) |
| #define BePutChar(fp, v) ( putc( (char) (v & 0xFF), fp), 0 ) |
| |
| #define BeGetShort(fp, pv) \ |
| ( (c = getc(fp)) == EOF ? -1 : \ |
| ( *pv = (c & 0xFF) << 8, \ |
| (c = getc(fp)) == EOF ? -1 : (*pv |= c & 0xFF, 0) \ |
| ) \ |
| ) |
| |
| #define BePutShort(fp, v) \ |
| ( putc((v >> 8) & 0xFF, fp), putc(v & 0xFF, fp), 0 ) |
| |
| static int BeGetInt(FILE *fp, int *pv) |
| { int c; |
| if ((c = getc(fp)) == EOF) return -1; |
| *pv = (c & 0xFF) << 24; |
| if ((c = getc(fp)) == EOF) return -1; |
| *pv |= (c & 0xFF) << 16; |
| if ((c = getc(fp)) == EOF) return -1; |
| *pv |= (c & 0xFF) << 8; |
| if ((c = getc(fp)) == EOF) return -1; |
| *pv |= c & 0xFF; |
| return 0; |
| } |
| |
| static int BePutInt(FILE *fp, int v) |
| { |
| putc((v >> 24) & 0xFF, fp); |
| putc((v >> 16) & 0xFF, fp); |
| putc((v >> 8) & 0xFF, fp); |
| putc(v & 0xFF, fp); |
| return 0; |
| } |
| |
| |
| /*@::CompressTrie(), TrieRead(), AccumulateRating()@**************************/ |
| /* */ |
| /* static CompressTrie(T) */ |
| /* */ |
| /* Compress trie T and return its length in characters. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| static void CompressTrie(TRIE T) |
| { FULL_CHAR *p, *q; int len, i; |
| debug0(DHY, DD, "CompressTrie(T), T ="); |
| debug2(DHY, DD, "Node space: %d capacity, %d used\n", |
| T->node_lim, T->node_free); |
| debug2(DHY, DD, "String space: %d capacity, %d used\n", |
| T->string_lim, T->string_lim - T->string_first); |
| ifdebug(DHY, DD, TriePrint(T, stderr)); |
| T->node_lim = T->node_free; |
| for( i = 0; i < T->node_lim; i++ ) |
| if( T->node_mem[i] < 0 ) |
| T->node_mem[i] = - ( -T->node_mem[i] - T->string_first); |
| p = (FULL_CHAR *) &(T->node_mem[T->node_free]); |
| q = &(T->string_mem[T->string_first]); |
| len = T->string_lim - T->string_first; |
| for( i = 0; i < len; i++ ) *p++ = *q++; |
| T->string_mem = (FULL_CHAR *) &(T->node_mem[T->node_lim]); |
| T->string_first = 0; |
| T->string_lim = len; |
| len = sizeof(struct trie_rec) + T->node_lim * sizeof(short) |
| + T->string_lim * sizeof(FULL_CHAR); |
| debug1(DHY, DD, "CompressTrie returning; len = %d, T =", len); |
| ifdebug(DHY, DD, TriePrint(T, stderr)); |
| } /* end CompressTrie */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* static TRIE TrieRead(lnum, success) */ |
| /* */ |
| /* Read in a packed trie if possible, otherwise pack an unpacked one. */ |
| /* The trie is to be for language lnum. */ |
| /* */ |
| /* Boolean success is set to true if no errors were encountered. If the */ |
| /* file read was a placeholder, success will be true but still a null */ |
| /* TRIE will be returned. */ |
| /* */ |
| /*****************************************************************************/ |
| #define START_STATE 0 |
| #define CLASSES_STATE 1 |
| #define EXCEPTIONS_STATE 2 |
| #define LENGTH_LIMIT_STATE 3 |
| #define PATTERNS_STATE 4 |
| |
| static TRIE TrieRead(LANGUAGE_NUM lnum, BOOLEAN *success) |
| { TRIE T; FILE_NUM unpacked_fnum, packed_fnum; OBJECT fname; |
| FILE *unpacked_fp, *packed_fp; unsigned len; |
| int prev, i, j, c, state, hline_num, length_limit; |
| #if DEBUG_ON |
| int icount = 0; |
| #endif |
| debug2(DHY, DD, "TrieRead(%d %s)", lnum, |
| lnum == 0 ? STR_NONE : LanguageString(lnum)); |
| |
| /* get hyphenation file name from language module */ |
| fname = LanguageHyph(lnum); |
| assert( fname == nilobj || is_word(type(fname)), "TrieRead: fname!" ); |
| if( fname == nilobj ) |
| { *success = FALSE; |
| return (TRIE) NULL; |
| } |
| |
| /* define and open packed file */ |
| debug0(DFS, DD, " calling DefineFile from TrieRead (1)"); |
| packed_fnum = DefineFile(string(fname), HYPH_PACKED_SUFFIX, |
| &fpos(fname), HYPH_PACKED_FILE, HYPH_PATH); |
| if( InitializeAll ) |
| { |
| /* initializing so want to reconstruct packed files */ |
| /* thanks to Ian Jackson <ian@chiark.greenend.org.uk> for this */ |
| packed_fp = NULL; |
| } |
| else |
| { |
| /* not initializing so use existing packed files if possible */ |
| packed_fp = OpenFile(packed_fnum, FALSE, FALSE); |
| } |
| |
| if( packed_fp == NULL ) |
| { |
| /* no packed file, so define and open unpacked one instead */ |
| FULL_CHAR str[MAX_BUFF], key[MAX_BUFF], value[MAX_BUFF], |
| buff[MAX_BUFF+10]; |
| int bpos, bcount; |
| debug0(DFS, DD, " calling DefineFile from TrieRead (2)"); |
| unpacked_fnum = DefineFile(string(fname), HYPH_SUFFIX, |
| &fpos(fname), HYPH_FILE, HYPH_PATH); |
| unpacked_fp = OpenFile(unpacked_fnum, FALSE, FALSE); |
| if( unpacked_fp == NULL ) |
| { Error(36, 8, "cannot open hyphenation file %s", |
| WARN, no_fpos, FileName(unpacked_fnum)); |
| *success = FALSE; |
| return (TRIE) NULL; |
| } |
| |
| /* check that first line contains magic header or stub */ |
| if( StringFGets(str, MAX_BUFF, unpacked_fp) == NULL || |
| ( !StringEqual(str, AsciiToFull("Lout hyphenation information\n")) && |
| !StringEqual(str, AsciiToFull("Lout hyphenation placeholder\n")) ) |
| ) |
| Error(36, 9, "header line of hyphenation file %s missing", |
| FATAL, no_fpos, FileName(unpacked_fnum)); |
| |
| /* if file is just a placeholder, exit silently with success */ |
| if( !StringEqual(str, AsciiToFull("Lout hyphenation information\n")) ) |
| { *success = TRUE; |
| return (TRIE) NULL; |
| } |
| |
| /* read the classes, exceptions, and patterns from the unpacked file */ |
| T = NewTrie( (unsigned) 120000, (unsigned) 32767); |
| state = START_STATE; |
| hline_num = 1; |
| length_limit = 0; |
| while( StringFGets(buff, MAX_BUFF, unpacked_fp) != NULL ) |
| { |
| hline_num++; bpos = 0; |
| while( sscanf( (char *) &buff[bpos], "%s%n", str, &bcount) == 1 && |
| str[0] != '%' ) |
| { |
| bpos += bcount; |
| DecodeEscapes(str, string(fname), hline_num); |
| |
| switch( state ) |
| { |
| case START_STATE: |
| |
| if( !StringEqual(str, AsciiToFull("Classes:")) ) |
| Error(36, 10, "Classes heading of hyphenation file %s missing", |
| FATAL, no_fpos, FileName(unpacked_fnum)); |
| state = CLASSES_STATE; |
| break; |
| |
| |
| case CLASSES_STATE: |
| |
| if( StringEqual(str, AsciiToFull("Exceptions:")) ) |
| { state = EXCEPTIONS_STATE; |
| } |
| else if( StringEqual(str, AsciiToFull("Patterns:")) ) |
| { state = PATTERNS_STATE; |
| } |
| else if( StringEqual(str, AsciiToFull("LengthLimit:")) ) |
| { state = LENGTH_LIMIT_STATE; |
| } |
| else |
| { debug1(DHY, DD, "adding class %s", str); |
| AddClassToTrie(str, T); |
| } |
| break; |
| |
| |
| case EXCEPTIONS_STATE: |
| |
| if( StringEqual(str, AsciiToFull("Patterns:")) ) |
| { state = PATTERNS_STATE; |
| } |
| else if( StringEqual(str, AsciiToFull("LengthLimit:")) ) |
| { state = LENGTH_LIMIT_STATE; |
| } |
| else |
| { prev = CH_EIGHT; j = 0; |
| key[j] = '.', value[j++] = prev, prev = CH_EIGHT; |
| for( i = 0; str[i] != '\0'; i++ ) |
| { if( str[i] == CH_HYPHEN ) prev = CH_NINE; |
| else key[j] = str[i], value[j++] = prev, prev = CH_EIGHT; |
| } |
| key[j] = '.', value[j++] = prev, prev = CH_EIGHT; |
| key[j] = '\0'; value[j] = prev; value[j+1] = '\0'; |
| if( !TrieInsert(key, value, T, string(fname), hline_num) ) |
| { |
| Error(36, 11, "hyphenation file %s%s is too large (at line %d)", |
| WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num); |
| *success = FALSE; |
| return (TRIE) NULL; |
| } |
| } |
| break; |
| |
| |
| case LENGTH_LIMIT_STATE: |
| |
| if( StringEqual(str, AsciiToFull("Patterns:")) ) |
| { state = PATTERNS_STATE; |
| } |
| else if( sscanf( (char *) str, "%d", &length_limit) != 1 ) |
| { |
| Error(36, 20, "bad LengthLimit in hyphenation file %s%s (line %d)", |
| WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num); |
| *success = FALSE; |
| return (TRIE) NULL; |
| } |
| break; |
| |
| |
| case PATTERNS_STATE: |
| |
| prev = CH_ZERO; j = 0; |
| for( i = 0; str[i] != '\0'; i++ ) |
| { if( decimaldigit(str[i]) ) prev = str[i]; |
| else key[j] = str[i], value[j++] = prev, prev = CH_ZERO; |
| } |
| key[j] = '\0'; value[j] = prev; value[j+1] = '\0'; |
| if( length_limit == 0 || j <= length_limit ) |
| { |
| debug3(DHY, DD, "TrieInsert(%s, %s, T) [%d]", key, value, ++icount); |
| if( !TrieInsert(key, value, T, string(fname), hline_num) ) |
| { |
| Error(36, 12, "hyphenation file %s%s is too large (at line %d)", |
| WARN, &fpos(fname), string(fname), HYPH_SUFFIX, hline_num); |
| *success = FALSE; |
| return (TRIE) NULL; |
| } |
| } |
| break; |
| |
| |
| default: |
| |
| assert(FALSE, "TrieRead: state"); |
| break; |
| |
| } /* end switch */ |
| } /* end while */ |
| } /* end while */ |
| |
| if( state != PATTERNS_STATE ) |
| Error(36, 13, "format error in hyphenation file %s", |
| FATAL, no_fpos, FileName(unpacked_fnum)); |
| fclose(unpacked_fp); |
| CompressTrie(T); |
| |
| /* Avoid writing and re-reading a cache file in benchmarking mode */ |
| #if 0 |
| /* write the compressed trie out to the packed file */ |
| /* cannot use FileName(packed_fnum) because path won't be right */ |
| StringCopy(buff, FileName(unpacked_fnum)); |
| StringCopy(&buff[StringLength(buff) - StringLength(HYPH_SUFFIX)], |
| HYPH_PACKED_SUFFIX); |
| packed_fp = StringFOpen(buff, WRITE_BINARY); |
| if( packed_fp == NULL ) |
| Error(36, 14, "cannot write to hyphenation file %s", FATAL,no_fpos,buff); |
| BePutInt(packed_fp, T->magic); |
| BePutInt(packed_fp, T->class_count); |
| for( i = 0; i < MAX_CHAR; i++ ) BePutChar(packed_fp, T->class[i]); |
| /* BePutInt(packed_fp, 0); */ /* placeholder for node_mem now omitted */ |
| BePutInt(packed_fp, T->node_lim); |
| BePutInt(packed_fp, T->node_free); |
| /* BePutInt(packed_fp, 0); */ /* placeholder for string_mem now omitted */ |
| BePutInt(packed_fp, T->string_lim); |
| BePutInt(packed_fp, T->string_first); |
| for( i=0; i < T->node_free; i++ ) BePutShort(packed_fp, T->node_mem[i]); |
| for( i=0; i < T->string_lim; i++) BePutChar(packed_fp, T->string_mem[i]); |
| fclose(packed_fp); |
| |
| /* free T */ |
| ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1, |
| sizeof(struct trie_rec) + 120000*sizeof(short)+32767*sizeof(char))); |
| free(T); |
| |
| /* now try again to open packed_fnum, the file just written */ |
| packed_fp = OpenFile(packed_fnum, FALSE, FALSE); |
| if( packed_fp == NULL ) |
| Error(36, 15, "cannot open hyphenation file %s", |
| FATAL, no_fpos, FileName(packed_fnum)); |
| #endif |
| } /* end if( packed_fp == NULL ) */ |
| |
| #if 0 |
| /* now packed hyphenation file is open, read it in */ |
| fseek(packed_fp, 0L, SEEK_END); |
| len = (unsigned) ftell(packed_fp); rewind(packed_fp); |
| ifdebug(DMA, DD, DebugRegisterUsage(MEM_HYPH_PATS, 1, len)); |
| /* the 2*sizeof(void*) is for the sizes of node_mem and string_mem */ |
| T = (TRIE) malloc(len + 2*sizeof(void*)); |
| if( T == (TRIE) NULL ) |
| Error(36, 16, "run out of memory while reading hyphenation table", |
| FATAL, no_fpos); |
| if( BeGetInt(packed_fp, &T->magic) != 0 ) |
| Error(36, 17, "error on read from packed hyphenation file %s", |
| FATAL, no_fpos, FileName(packed_fnum)); |
| if( T->magic != TRIE_MAGIC ) |
| Error(36, 18, "bad magic number in hyphenation file %s", |
| FATAL, no_fpos, FileName(packed_fnum)); |
| BeGetInt(packed_fp, &T->class_count); |
| for( i = 0; i < MAX_CHAR; i++ ) BeGetChar(packed_fp, &T->class[i]); |
| /* BeGetInt(packed_fp, &i); */ /* placeholder for node_mem now omitted */ |
| BeGetInt(packed_fp, &T->node_lim); |
| BeGetInt(packed_fp, &T->node_free); |
| /* BeGetInt(packed_fp, &i); */ /* placeholder for string_mem now omitted */ |
| BeGetInt(packed_fp, &T->string_lim); |
| BeGetInt(packed_fp, &T->string_first); |
| T->node_mem = (short *) ( (char *) T + sizeof(struct trie_rec) ); |
| T->string_mem = (FULL_CHAR *) &(T->node_mem[T->node_lim]); |
| for( i = 0; i < T->node_free; i++ ) BeGetShort(packed_fp, &T->node_mem[i]); |
| for( i = 0; i < T->string_lim; i++ ) BeGetChar(packed_fp, &T->string_mem[i]); |
| fclose(packed_fp); |
| #endif |
| |
| /* debug and exit */ |
| debug0(DHY, DD, "TrieRead returning, T ="); |
| *success = TRUE; |
| ifdebug(DHY, DD, TriePrint(T, stderr)); |
| return T; |
| } /* end TrieRead */ |
| |
| |
| /*****************************************************************************/ |
| /* */ |
| /* AccumulateRating(x, y) */ |
| /* */ |
| /* Accumulate the hyphenation rating string x into y. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| #define AccumulateRating(x, y) \ |
| { FULL_CHAR *p = x, *q = y; \ |
| while( *p ) \ |
| { if( *p > *q ) *q = *p; \ |
| p++, q++; \ |
| } \ |
| } /* end AccumulateRating */ |
| |
| |
| /*@::ReadHyphTable()@*********************************************************/ |
| /* */ |
| /* BOOLEAN ReadHyphTable(lnum) */ |
| /* */ |
| /* Read hyphenation table for language lnum. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| BOOLEAN ReadHyphTable(LANGUAGE_NUM lnum) |
| { BOOLEAN res; |
| debug1(DHY, DD, "ReadHyphTable(%d)", lnum); |
| assert(lnum > 0, "ReadHyphTable: lnum <= 0!"); |
| assert(HyphTables[lnum]==(TRIE) NULL && !TriedFile[lnum], "ReadHyphTable!"); |
| HyphTables[lnum] = TrieRead(lnum, &res); |
| TriedFile[lnum] = TRUE; |
| debug2(DHY, DD, "ReadHyphTable(%d) returning %s", lnum, bool(res)); |
| return res; |
| } /* end ReadHyphTable */ |
| |
| |
| /*@::Hyphenate@***************************************************************/ |
| /* */ |
| /* OBJECT Hyphenate(x) */ |
| /* */ |
| /* Hyphenate ACAT object x, returning the hyphenated result. */ |
| /* */ |
| /*****************************************************************************/ |
| |
| OBJECT Hyphenate(OBJECT x) |
| { OBJECT link, y, z, next_link; TRIE T; LANGUAGE_NUM lnum; |
| FULL_CHAR str[MAX_WORD+2], rate[MAX_WORD+3], val[MAX_WORD+3], |
| *class, *key, *ss, *s, *p, *rem, *lig, *a, *b; |
| int start, stop, i, curr_node, next_node, pos; |
| BOOLEAN hyphenated, success; |
| assert( type(x) == ACAT, "Hyphenate: type(x) != ACAT!" ); |
| debug1(DHY, D, "Hyphenate(%s)", EchoObject(x)); |
| |
| /* for each word y of x, try to hyphenate it */ |
| for( link = Down(x); link != x; link = NextDown(link) ) |
| { Child(y, link); |
| if( !is_word(type(y)) || string(y)[0] == '\0' || !word_hyph(y) ) |
| { |
| if( type(y) == GAP_OBJ && mode(gap(y)) == HYPH_MODE ) |
| nobreak(gap(y)) = FALSE; |
| continue; |
| } |
| debug1(DHY, DD, "Hyphenate() examining %s", EchoObject(y)); |
| |
| /* determine T, the trie to use */ |
| lnum = word_language(y); |
| if( lnum == 0 ) |
| Error(36, 19, "no current language for word %s", |
| FATAL, &fpos(y), string(y)); |
| T = HyphTables[lnum]; |
| |
| /* if no trie is present, try to get it from a file */ |
| if( T == (TRIE) NULL ) |
| { if( !TriedFile[lnum] ) |
| { T = HyphTables[lnum] = TrieRead(lnum, &success); |
| TriedFile[lnum] = TRUE; |
| } |
| if( T == (TRIE) NULL ) |
| { debug1(DHY, DD, "Hyphenate continuing (no trie for %s)", string(y)); |
| continue; |
| } |
| } |
| |
| /* start := index of first letter of y, stop := index following last */ |
| key = string(y); class = T->class; |
| for( start = 0; class[key[start]] == PUNCT_CLASS; start++ ); |
| for( stop = start; class[key[stop]] > PUNCT_CLASS; stop++ ); |
| |
| /* if a - ended the run, hyphenate there only */ |
| if( key[stop] == CH_HYPHEN ) |
| { |
| /* actually, don't hyphenate if the hyphen is last in the word [thanks Uwe] */ |
| if( key[stop+1] == '\0' ) |
| continue; |
| |
| next_link = NextDown(link); |
| z = MakeWord(WORD, &key[stop+1], &fpos(y)); |
| word_font(z) = word_font(y); |
| word_colour(z) = word_colour(y); |
| word_outline(z) = word_outline(y); |
| word_language(z) = word_language(y); |
| word_hyph(z) = word_hyph(y); |
| underline(z) = underline(y); |
| debug1(DHY, DD, "Hyphenate (hyph case) making fragment %s", string(z)); |
| FontWordSize(z); |
| Link(NextDown(link), z); |
| New(z, GAP_OBJ); |
| hspace(z) = vspace(z) = 0; |
| SetGap(gap(z), FALSE, FALSE, TRUE, FIXED_UNIT, HYPH_MODE, 0); |
| underline(z) = underline(y); |
| Link(NextDown(link), z); |
| Link(z, MakeWord(WORD, STR_GAP_ZERO_HYPH, &fpos(y))); |
| key[stop + 1] = '\0'; |
| FontWordSize(y); |
| /* *** link = PrevDown(next_link); */ |
| link = NextDown(link); |
| continue; |
| } |
| |
| /* do not hyphenate if less than 5 letters, or a kill char is nearby */ |
| if( stop - start < 5 ) continue; |
| if( key[stop] != '\0' && class[key[stop]] == KILL_CLASS ) continue; |
| |
| /* let str[] be the converted substring, let rate[] be all CH_ZERO */ |
| str[0] = PUNCT_CLASS; rate[0] = CH_ZERO; |
| for( i = 0; i < stop - start; i++ ) |
| { str[i+1] = class[key[start + i]]; |
| rate[i+1] = CH_ZERO; |
| } |
| str[i+1] = PUNCT_CLASS; rate[i+1] = CH_ZERO; |
| str[i+2] = '\0'; rate[i+2] = CH_ZERO; |
| rate[i+3] = '\0'; |
| ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr)); |
| |
| /* for each suffix of str[], accumulate patterns matching its prefixes */ |
| ss = str; |
| do |
| { |
| ifdebug(DHY, DD, |
| fprintf(stderr, "trying suffix \""); |
| for( p = ss; *p != 0; p++ ) fprintf(stderr, "%c", findrep(*p, T)); |
| fprintf(stderr, "\"\n"); |
| ); |
| |
| /* accumulate all prefixes of ss */ |
| curr_node = 0; s = ss; |
| for(;;) |
| { |
| /* if curr_node has empty string, that is one prefix */ |
| pos = T->node_mem[curr_node]; |
| if( pos < 0 ) |
| { AltUncompressValue(&(T->string_mem[- pos]), val); |
| AccumulateRating(val, rate+(ss-str)); |
| debug1(DHY, DD, " found %s", val); |
| } |
| |
| /* if ss is finished, no other prefixes are possible */ |
| if( *s == '\0' ) break; |
| |
| /* determine next_node and break if empty */ |
| next_node = T->node_mem[curr_node + *s]; |
| if( next_node == 0 ) break; |
| |
| /* if next_node is a string, check whether it is a prefix of ss */ |
| if( next_node < 0 ) |
| { rem = &(T->string_mem[-next_node]); |
| do |
| { if( *rem == '\0' ) |
| { AltUncompressValue(rem+1, val); |
| AccumulateRating(val, rate+(ss-str)); |
| debug1(DHY, DD, " found %s", val); |
| break; |
| } |
| } while( *++s == *rem++ ); |
| break; |
| } |
| |
| /* otherwise go on to the next trie node */ |
| curr_node = NODE_MULT*next_node; s++; |
| } |
| } while( *(++ss + 1) != PUNCT_CLASS ); |
| ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr)); |
| |
| /* set rate[i] to CH_ZERO whenever key[start+i-1] lies within a ligature */ |
| lig = finfo[word_font(y)].lig_table; |
| for( p = key, i = 2; *p != '\0'; p++, i++ ) |
| { if( lig[*p] > 1 ) |
| { a = &lig[ lig[*p] + MAX_CHARS ]; |
| while( *a++ == *p ) |
| { b = p+1; |
| while( *a == *b && *(a+1) != '\0' && *b != '\0' ) a++, b++; |
| if( *(a+1) == '\0' ) |
| { rate[i] = CH_ZERO; |
| break; |
| } |
| else |
| { while( *++a ); |
| a++; |
| } |
| } |
| } |
| } |
| ifdebug(DHY, DD, ShowRate(key, start, stop, rate, stderr)); |
| |
| /* now rate[] has accumulated ratings; use it to perform hyphenations */ |
| hyphenated = FALSE; |
| next_link = NextDown(link); |
| for( i = stop - start - 1; i >= 3; i-- ) |
| { |
| /* hyphenate at i if rate[i] is odd */ |
| if( is_odd(rate[i]) ) |
| { z = MakeWord(WORD, &key[start+i-1], &fpos(y)); |
| word_font(z) = word_font(y); |
| word_colour(z) = word_colour(y); |
| word_outline(z) = word_outline(y); |
| word_language(z) = word_language(y); |
| word_hyph(z) = word_hyph(y); |
| underline(z) = underline(y); |
| debug1(DHY, D, "Hyphenate making fragment %s", string(z)); |
| FontWordSize(z); |
| Link(NextDown(link), z); |
| New(z, GAP_OBJ); |
| hspace(z) = vspace(z) = 0; |
| SetGap(gap(z), FALSE, FALSE, TRUE, FIXED_UNIT, HYPH_MODE, 0); |
| underline(z) = underline(y); |
| Link(NextDown(link), z); |
| Link(z, MakeWord(WORD, STR_GAP_ZERO_HYPH, &fpos(y))); |
| key[start + i - 1] = '\0'; |
| hyphenated = TRUE; |
| } |
| } |
| if( hyphenated ) |
| { FontWordSize(y); |
| link = PrevDown(next_link); |
| } |
| |
| } /* end for each word */ |
| |
| debug3(DHY, D, "Hyphenate returning %s,%s %s", |
| EchoLength(back(x, COLM)), EchoLength(fwd(x, COLM)), EchoObject(x)); |
| return x; |
| } /* end Hyphenate */ |