blob: 971d2f441e8425f7eb9fce69c1d12b6ecf61889d [file] [log] [blame]
/*
Copyright 2002-2003 John Plevyak, All Rights Reserved
*/
#ifndef _gram_H_
#define _gram_H_
#define EOF_SENTINAL "\377"
#define NO_PROD 0xFFFFFFFF
struct Production;
struct Rule;
struct Elem;
struct Term;
struct State;
struct ScanState;
struct ScanStateTransition;
struct D_ParserTables;
typedef struct Elem Item;
typedef struct Code {
char *code;
int line;
} Code;
typedef struct Goto {
struct Elem *elem;
struct State *state;
} Goto;
typedef Vec(Goto *) VecGoto;
typedef enum ActionKind {
ACTION_ACCEPT, ACTION_SHIFT, ACTION_REDUCE
} ActionKind;
typedef struct Action {
ActionKind kind;
struct Term *term;
struct Rule *rule;
struct State *state;
uint index;
char *temp_string;
} Action;
typedef Vec(Action *) VecAction;
typedef struct Hint {
uint depth;
struct State *state;
struct Rule *rule;
} Hint;
typedef Vec(Hint *) VecHint;
typedef Vec(struct ScanStateTransition*) VecScanStateTransition;
typedef Vec(struct ScanState *) VecScanState;
typedef struct Scanner {
VecScanState states;
VecScanStateTransition transitions;
} Scanner;
typedef struct State {
uint index;
uint64 hash;
Vec(Item*) items;
Vec(Item*) items_hash;
VecGoto gotos;
VecAction shift_actions;
VecAction reduce_actions;
VecHint right_epsilon_hints;
VecHint error_recovery_hints;
Scanner scanner;
uint accept:1;
uint scanner_code:1;
uint goto_on_token:1;
uint scan_kind:2;
uint8 *goto_valid;
int goto_table_offset;
struct State *same_shifts;
struct State *reduces_to;
struct Rule *reduces_with;
struct Rule *reduces_to_then_with;
} State;
#define ASSOC_LEFT 0x0001
#define ASSOC_RIGHT 0x0002
#define ASSOC_NARY 0x0004
#define ASSOC_UNARY 0x0008
#define ASSOC_BINARY 0x0010
typedef enum AssocKind {
ASSOC_NONE = 0,
ASSOC_NARY_LEFT = (ASSOC_NARY|ASSOC_LEFT),
ASSOC_NARY_RIGHT = (ASSOC_NARY|ASSOC_RIGHT),
ASSOC_UNARY_LEFT = (ASSOC_UNARY|ASSOC_LEFT),
ASSOC_UNARY_RIGHT = (ASSOC_UNARY|ASSOC_RIGHT),
ASSOC_BINARY_LEFT = (ASSOC_BINARY|ASSOC_LEFT),
ASSOC_BINARY_RIGHT = (ASSOC_BINARY|ASSOC_RIGHT),
ASSOC_NO = 0x0020
} AssocKind;
#define IS_RIGHT_ASSOC(_x) ((_x) & ASSOC_RIGHT)
#define IS_LEFT_ASSOC(_x) ((_x) & ASSOC_LEFT)
#define IS_NARY_ASSOC(_x) ((_x) & ASSOC_NARY)
#define IS_BINARY_ASSOC(_x) ((_x) & ASSOC_BINARY)
#define IS_UNARY_ASSOC(_x) ((_x) & ASSOC_UNARY)
#define IS_UNARY_BINARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_UNARY_ASSOC(_x))
#define IS_BINARY_NARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_NARY_ASSOC(_x))
/* not valid for NARY */
#define IS_EXPECT_RIGHT_ASSOC(_x) ((_x) && (_x) != ASSOC_UNARY_LEFT)
#define IS_EXPECT_LEFT_ASSOC(_x) ((_x) && _x != ASSOC_UNARY_RIGHT)
typedef struct Rule {
uint index;
struct Production *prod;
int op_priority;
AssocKind op_assoc;
int rule_priority;
AssocKind rule_assoc;
Vec(struct Elem*) elems;
struct Elem *end;
Code speculative_code;
Code final_code;
Vec(Code*) pass_code;
int action_index;
struct Rule *same_reduction;
} Rule;
typedef enum TermKind {
TERM_STRING, TERM_REGEX, TERM_CODE, TERM_TOKEN
} TermKind;
typedef struct Term {
TermKind kind;
uint index;
int term_priority;
AssocKind op_assoc;
int op_priority;
char *string;
int string_len;
uint scan_kind:3;
uint ignore_case:1;
struct Production *regex_production;
} Term;
typedef Vec(Term *) TermVec;
typedef enum DeclarationKind {
DECLARE_TOKENIZE, DECLARE_LONGEST_MATCH, DECLARE_ALL_MATCHES,
DECLARE_SET_OP_PRIORITY, DECLARE_STATES_FOR_ALL_NTERMS,
DECLARE_STATE_FOR, DECLARE_WHITESPACE, DECLARE_SAVE_PARSE_TREE, DECLARE_NUM
} DeclarationKind;
typedef struct Declaration {
struct Elem * elem;
uint kind;
uint index;
} Declaration;
typedef enum InternalKind {
INTERNAL_NOT, INTERNAL_HIDDEN, INTERNAL_CONDITIONAL, INTERNAL_STAR, INTERNAL_PLUS
} InternalKind;
typedef struct Production {
char *name;
uint name_len;
Vec(Rule *) rules;
uint index;
uint regex:1;
uint in_regex:1;
uint internal:3; /* production used for EBNF */
Rule *nullable; /* shortest rule for epsilon reduction */
struct Production *declaration_group[DECLARE_NUM];
struct Declaration *last_declaration[DECLARE_NUM];
State *state; /* state for independent parsing of this productions*/
struct Elem *elem; /* base elem for the item set of the above state */
struct Term *regex_term; /* regex production terminal */
struct Production *next;
} Production;
typedef enum ElemKind {
ELEM_NTERM, ELEM_TERM, ELEM_UNRESOLVED, ELEM_END
} ElemKind;
typedef struct Elem {
ElemKind kind;
uint index;
Rule *rule;
union {
Production *nterm;
Term *term;
void *term_or_nterm;
struct Unresolved {
char *string;
uint len;
} unresolved;
} e;
} Elem;
typedef struct Grammar {
char *pathname;
Vec(Production *) productions;
Vec(Term *) terminals;
Vec(State *) states;
Code scanner;
Code *code;
int ncode;
Vec(Declaration *) declarations;
Vec(D_Pass *) passes;
char *default_white_space;
/* grammar construction options */
int set_op_priority_from_rule;
int right_recursive_BNF;
int states_for_whitespace;
int states_for_all_nterms;
int tokenizer;
int longest_match;
int save_parse_tree;
/* grammar writing options */
char grammar_ident[256];
int scanner_blocks;
int scanner_block_size;
int write_line_directives;
int write_header;
int token_type;
/* temporary variables for grammar construction */
struct Production * p;
struct Rule * r;
struct Elem * e;
int action_index;
int action_count;
int pass_index;
int rule_index;
} Grammar;
/* automatically add %op_XXX to rightmost token of %XXX rule, default off */
Grammar *new_D_Grammar(char *pathname);
void free_D_Grammar(Grammar *g);
int build_grammar(Grammar *g);
int parse_grammar(Grammar *g, D_ParserTables *t, int sizeof_ParseNode_User);
void print_grammar(Grammar *g);
void print_states(Grammar *g);
void print_rule(Rule *r);
void print_term(Term *t);
Production *lookup_production(Grammar *g, char *name, int len);
/* for creating grammars */
#define last_elem(_r) ((_r)->elems.v[(_r)->elems.n-1])
Rule *new_rule(Grammar *g, Production *p);
Elem *new_elem_nterm(Production *p, Rule *r);
void new_declaration(Grammar *g, Elem *e, uint kind);
Production *new_production(Grammar *g, char *name);
Elem *new_string(Grammar *g, char *s, char *e, Rule *r);
Elem *new_ident(char *s, char *e, Rule *r);
void new_token(Grammar *g, char *s, char *e);
Elem *new_code(Grammar *g, char *s, char *e, Rule *r);
void add_global_code(Grammar *g, char *start, char *end, int line);
Production *new_internal_production(Grammar *g, Production *p);
Elem * dup_elem(Elem *e, Rule *r);
void add_declaration(Grammar *g, char *start, char *end, uint kind, uint line);
void add_pass(Grammar *g, char *start, char *end, uint kind, uint line);
void add_pass_code(Grammar *g, Rule *r, char *pass_start, char *pass_end,
char *code_start, char *code_end, uint line, uint pass_line);
D_Pass *find_pass(Grammar *g, char *start, char *end);
void conditional_EBNF(Grammar *g); /* applied to g->e,g->r,g->p */
void star_EBNF(Grammar *g); /* ditto */
void plus_EBNF(Grammar *g); /* ditto */
void initialize_productions(Grammar *g);
void finalize_productions(Grammar *g);
int state_for_declaration(Grammar *g, int iproduction);
#endif