blob: b4b38e7cb57177e15d70d9db32176ff9bee169f5 [file] [log] [blame]
#!/usr/bin/perl
#
# Copyright (C) 2002 eXtensible Systems, Inc. All Rights Reserved
#
# This program is open source software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (GPL) as published by
# the Free Software Foundation; either version 2 of the License, or (at your
# option) any later version. You should have received a copy of the GPL in a
# file named COPYING that was included with this program; if not, you can
# obtain a copy of the license through the Internet at http://www.fsf.org/
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#-------------------------------------------------------------------------------
#
# This script will extract the identifier names from a RelaxNG schema. The
# identifier names form the token set that act as terminals in the grammar. We
# make parsing efficient by generating a perfect hash function with gperf from
# the set of token identifiers.
#
# Identifier names are X in the following schema constructs:
# <element name="X">
# <attribute name="X">
# <value>X</value>
#
# Usage:
# mkTokenizer -f <schema_file> <hlvm_src_root>
#
use FindBin;
use lib $FindBin::Bin;
use mkFuncs;
use File::Copy;
$SchemaFile = shift;
if ("$SchemaFile" eq "-f") {
$Force = 1;
$SchemaFile = shift;
}
die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$SchemaFile";
$HLVM_root = get_hlvm_dir();
die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$HLVM_root";
$Schema = $SchemaFile;
$Schema =~ s/\.rng//;
$Schema = substr($Schema,rindex($Schema,'/')+1);
$PreambleFile = $HLVM_root . "/utils/tmplt/Preamble_Code";
$HeaderTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.h";
$HeaderFile = $Schema . "Tokenizer.h";
$SourceTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.cpp";
$SourceFile = $Schema . "Tokenizer.cpp";
$TokenHashClass = $Schema . "TokenHash";
$TokenHashFile = $TokenHashClass . ".i";
# Check the validity of the files we use/create
die "Invalid schema file name" if ! -e "$SchemaFile";
die "ERROR: '$PreambleFile' doesn't exist" if ! -e "$PreambleFile";
if (!$Force) {
die "ERROR: '$HeaderFile' exists" if -e "$HeaderFile";
die "ERROR: '$SourceFile' exists" if -e "$SourceFile";
die "ERROR: '$TokenHashFile' exists" if -e "$TokenHashFile";
}
# Get the plain old schema name from the file name
sub sortUnique
{
my @list = @_;
my %set;
my @result = ();
for ($i = 0; $i <= $#_; $i++)
{
if (!exists $set{$list[$i]} )
{
$set{$_[$i]} = 1;
push @result, $_[$i];
}
}
return sort(@result);
}
sub getTokens
{
my $fname = shift(@_);
my $stage = 0;
my @tokens;
open( SCHEMA, "< $fname") || die "Couldn't open $fname for reading.\n";
while ( defined($line = <SCHEMA>) )
{
while ($line =~ /<element[^>]*name="([^"]*)"/g) {
push @tokens,$1;
}
while ($line =~ /<attribute[^>]*name="([^"]*)"/g) {
push @tokens,$1;
}
while ($line =~ /<value>\s*([^<\s]*)/g) {
push @tokens,$1;
}
}
close SCHEMA;
return sortUnique(@tokens);
}
# Extract the terminal tokens from the schema file
my @tokens = getTokens($SchemaFile);
# Set up a gperf invocation to convert the token list into a perfect hash
# function
open(GPERF,"| gperf -tcDCIoGl --fast 0 -L C++ -Z $TokenHashClass -s 2 -S 1 -k '*' > $TokenHashFile");
# Run the input through GPERF to create the perfect hash function
$hlvmdir = get_hlvm_dir();
chomp($ModulePath = `pwd`);
$ModulePath = substr($ModulePath,rindex($ModulePath,"/hlvm/")+6);
$Module = $ModulePath;
$Module =~ s|\/|_|g;
print GPERF "struct TokenMap {\n";
print GPERF "const char *name; HLVM_$Module"."::".$Schema."Tokens token;\n" ;
print GPERF "};\n%%\n" ;
print GPERF "\"$_\", HLVM_".$Module."::TKN_".$_.",\n" foreach @tokens;
print GPERF "%%\n";
close GPERF;
# Generate the header file for the tokenizer, starting it with the preamble for
# C++ source files
$TOKEN_LIST = "TKN_" . join(",\n TKN_",@tokens) . ",";
$SCHEMA_NAME = $Schema;
process_file($PreambleFile,$HeaderTemplate,$HeaderFile);
process_file($PreambleFile,$SourceTemplate,$SourceFile);