blob: 8c7210414442150f07abecb1942bc4e9cd130c36 [file] [log] [blame]
/*
Grammar Grammar
*/
{
#include "gramgram.h"
#include "d.h"
}
grammar: global_code* (production global_code*)+;
global_code: curly_code {
add_global_code($g, $n0.start_loc.s+1, $n0.end-1,
$n0.start_loc.line);
}
| '${scanner' balanced_code+ '}' {
$g->scanner.code = dup_str($n1.start_loc.s, $n1.end);
$g->scanner.line = $n0.start_loc.line;
}
| '${declare' declarationtype identifier* '}' {
if (!d_get_number_of_children(&$n2))
add_declaration($g, $n2.start_loc.s, $n2.end, $1.kind, $n2.start_loc.line);
else {
int i, n = d_get_number_of_children(&$n2);
for (i = 0; i < n; i++) {
D_ParseNode *pn = d_get_child(&$n2, i);
add_declaration($g, pn->start_loc.s, pn->end, $1.kind, pn->start_loc.line);
}
}
}
| '${token' token_identifier+ '}'
| '${action}' { $g->action_index++; }
| '${pass' identifier pass_types '}' {
add_pass($g, $n1.start_loc.s, $n1.end, $2.kind, $n1.start_loc.line);
}
;
pass_types:
| pass_type pass_types { $$.kind = $0.kind | $1.kind; }
;
pass_type : 'preorder' { $$.kind |= D_PASS_PRE_ORDER; }
| 'postorder' { $$.kind |= D_PASS_POST_ORDER; }
| 'manual' { $$.kind |= D_PASS_MANUAL; }
| 'for_all' { $$.kind |= D_PASS_FOR_ALL; }
| 'for_undefined' { $$.kind |= D_PASS_FOR_UNDEFINED; }
;
declarationtype: 'tokenize' { $$.kind = DECLARE_TOKENIZE; }
| 'longest_match' { $$.kind = DECLARE_LONGEST_MATCH; }
| 'whitespace' { $$.kind = DECLARE_WHITESPACE; }
| 'all_matches' { $$.kind = DECLARE_ALL_MATCHES; }
| 'set_op_priority_from_rule' { $$.kind = DECLARE_SET_OP_PRIORITY; }
| 'all_subparsers' { $$.kind = DECLARE_STATES_FOR_ALL_NTERMS; }
| 'subparser' { $$.kind = DECLARE_STATE_FOR; }
| 'save_parse_tree' { $$.kind = DECLARE_SAVE_PARSE_TREE; }
;
token_identifier: identifier { new_token($g, $n0.start_loc.s, $n0.end); };
production : production_name ':' rules ';'
| production_name regex_production rules ';'
| ';';
regex_production : '::=' { $g->p->regex = 1; };
production_name : (identifier | '_')
{ $g->p = new_production($g, dup_str($n0.start_loc.s, $n0.end)); }
;
rules : rule ('|' rule)*;
rule : new_rule ((element element_modifier*)* simple_element element_modifier*)? rule_modifier* rule_code {
vec_add(&$g->p->rules, $g->r);
};
new_rule : { $g->r = new_rule($g, $g->p); };
simple_element
: string { $g->e = new_string($g, $n0.start_loc.s, $n0.end, $g->r); }
| regex { $g->e = new_string($g, $n0.start_loc.s, $n0.end, $g->r); }
| identifier { $g->e = new_ident($n0.start_loc.s, $n0.end, $g->r); }
| '${scan' balanced_code+ '}' { $g->e = new_code($g, $n1.start_loc.s, $n1.end, $g->r); }
| '(' new_subrule rules ')' {
$g->e = new_elem_nterm($g->p, $1.r);
$g->p = $1.p;
$g->r = $1.r;
vec_add(&$g->r->elems, $g->e);
}
;
element
: simple_element
| bracket_code {
Production *p = new_internal_production($g, NULL);
Rule *r = new_rule($g, p);
vec_add(&p->rules, r);
r->speculative_code.code = dup_str($n0.start_loc.s + 1, $n0.end - 1);
r->speculative_code.line = $n0.start_loc.line;
$g->e = new_elem_nterm(p, $g->r);
vec_add(&$g->r->elems, $g->e);
}
| curly_code {
Production *p = new_internal_production($g, NULL);
Rule *r = new_rule($g, p);
vec_add(&p->rules, r);
r->final_code.code = dup_str($n0.start_loc.s + 1, $n0.end - 1);
r->final_code.line = $n0.start_loc.line;
$g->e = new_elem_nterm(p, $g->r);
vec_add(&$g->r->elems, $g->e);
}
;
new_subrule : {
$$.p = $g->p;
$$.r = $g->r;
$g->p = new_internal_production($g, $g->p);
$g->r = new_rule($g, $g->p);
};
element_modifier
: '$term' integer {
if ($g->e->kind != ELEM_TERM)
d_fail("terminal priority on non-terminal");
$g->e->e.term->term_priority = strtol($n1.start_loc.s, NULL, 0);
}
| '/i' {
if ($g->e->kind != ELEM_TERM)
d_fail("ignore-case (/i) on non-terminal");
$g->e->e.term->ignore_case = 1;
}
| '?' { conditional_EBNF($g); }
| '*' { star_EBNF($g); }
| '+' { plus_EBNF($g); } ;
rule_modifier : rule_assoc rule_priority;
rule_assoc
: '$unary_op_right' { $g->r->op_assoc = ASSOC_UNARY_RIGHT; }
| '$unary_op_left' { $g->r->op_assoc = ASSOC_UNARY_LEFT; }
| '$binary_op_right' { $g->r->op_assoc = ASSOC_BINARY_RIGHT; }
| '$binary_op_left' { $g->r->op_assoc = ASSOC_BINARY_LEFT; }
| '$unary_right' { $g->r->rule_assoc = ASSOC_UNARY_RIGHT; }
| '$unary_left' { $g->r->rule_assoc = ASSOC_UNARY_LEFT; }
| '$binary_right' { $g->r->rule_assoc = ASSOC_BINARY_RIGHT; }
| '$binary_left' { $g->r->rule_assoc = ASSOC_BINARY_LEFT; }
| '$right' { $g->r->rule_assoc = ASSOC_NARY_RIGHT; }
| '$left' { $g->r->rule_assoc = ASSOC_NARY_LEFT; }
;
rule_priority : integer {
if ($g->r->op_assoc) $g->r->op_priority = strtol($n0.start_loc.s, NULL, 0);
else $g->r->rule_priority = strtol($n0.start_loc.s, NULL, 0);
};
rule_code : speculative_code? final_code? pass_code* ;
speculative_code : bracket_code {
$g->r->speculative_code.code = dup_str($n0.start_loc.s + 1, $n0.end - 1);
$g->r->speculative_code.line = $n0.start_loc.line;
};
final_code : curly_code {
$g->r->final_code.code = dup_str($n0.start_loc.s + 1, $n0.end - 1);
$g->r->final_code.line = $n0.start_loc.line;
};
pass_code : identifier ':' curly_code {
add_pass_code($g, $g->r, $n0.start_loc.s, $n0.end, $n2.start_loc.s+1,
$n2.end-1, $n0.start_loc.line, $n2.start_loc.line);
};
curly_code: '{' balanced_code* '}';
bracket_code: '[' balanced_code* ']';
balanced_code : '(' balanced_code* ')'
| '[' balanced_code* ']'
| '{' balanced_code* '}'
| string | identifier | regex | integer | symbols;
symbols : "[!~`@#$%^&*\-_+=|:;\\<,>.?/]";
string: "'([^'\\]|\\[^])*'";
regex: "\"([^\"\\]|\\[^])*\"";
identifier: "[a-zA-Z_][a-zA-Z_0-9]*" $term -1;
integer: decimalint | hexint | octalint;
decimalint: "-?[1-9][0-9]*[uUlL]?";
hexint: "-?(0x|0X)[0-9a-fA-F]+[uUlL]?";
octalint: "-?0[0-7]*[uUlL]?";