hlvm/utils/bin/mkTokenizer - llvm-archive - Git at Google

 #!/usr/bin/perl
 #
 # Copyright (C) 2002 eXtensible Systems, Inc. All Rights Reserved
 #
 # This program is open source software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License (GPL) as published by
 # the Free Software Foundation; either version 2 of the License, or (at your
 # option) any later version. You should have received a copy of the GPL in a
 # file named COPYING that was included with this program; if not, you can
 # obtain a copy of the license through the Internet at http://www.fsf.org/
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 # for more details.
 #-------------------------------------------------------------------------------
 #
 # This script will extract the identifier names from a RelaxNG schema. The
 # identifier names form the token set that act as terminals in the grammar. We
 # make parsing efficient by generating a perfect hash function with gperf from
 # the set of token identifiers.
 #
 # Identifier names are X in the following schema constructs:
 #    <element name="X">
 #    <attribute name="X">
 #    <value>X</value>
 #
 # Usage:
 #   mkTokenizer -f <schema_file> <hlvm_src_root>
 #
 use FindBin;
 use lib $FindBin::Bin;
 use mkFuncs;
 use File::Copy;

 $SchemaFile = shift;
 if ("$SchemaFile" eq "-f") {
   $Force = 1;
   $SchemaFile = shift;
 }
 die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$SchemaFile";
 $HLVM_root = get_hlvm_dir();
 die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$HLVM_root";

 $Schema = $SchemaFile;
 $Schema =~ s/\.rng//;
 $Schema = substr($Schema,rindex($Schema,'/')+1);
 $PreambleFile = $HLVM_root . "/utils/tmplt/Preamble_Code";
 $HeaderTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.h";
 $HeaderFile = $Schema . "Tokenizer.h";
 $SourceTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.cpp";
 $SourceFile = $Schema . "Tokenizer.cpp";
 $TokenHashClass = $Schema . "TokenHash";
 $TokenHashFile = $TokenHashClass . ".i";

 # Check the validity of the files we use/create
 die "Invalid schema file name" if ! -e "$SchemaFile";
 die "ERROR: '$PreambleFile' doesn't exist" if ! -e "$PreambleFile";
 if (!$Force) {
   die "ERROR: '$HeaderFile' exists" if -e "$HeaderFile";
   die "ERROR: '$SourceFile' exists" if -e "$SourceFile";
   die "ERROR: '$TokenHashFile' exists" if -e "$TokenHashFile";
 }

 # Get the plain old schema name from the file name

 sub sortUnique
 {
   my @list = @_;
   my %set;
   my @result = ();
   for ($i = 0; $i <= $#_; $i++)
   {
     if (!exists $set{$list[$i]} )
     {
       $set{$_[$i]} = 1;
       push @result, $_[$i];
     }
   }
   return sort(@result);
 }

 sub getTokens
 {
   my $fname = shift(@_);
   my $stage = 0;
   my @tokens;

   open( SCHEMA, "< $fname") || die "Couldn't open $fname for reading.\n";

   while ( defined($line = <SCHEMA>) )
   {
     while ($line =~ /<element[^>]*name="([^"]*)"/g) {
       push @tokens,$1;
     }
     while ($line =~ /<attribute[^>]*name="([^"]*)"/g) {
       push @tokens,$1;
     }
     while ($line =~ /<value>\s*([^<\s]*)/g) {
       push @tokens,$1;
     }
   }
   close SCHEMA;

   return sortUnique(@tokens);
 }

 # Extract the terminal tokens from the schema file
 my @tokens = getTokens($SchemaFile);

 # Set up a gperf invocation to convert the token list into a perfect hash
 # function
 open(GPERF,"| gperf -tcDCIoGl --fast 0 -L C++ -Z $TokenHashClass -s 2 -S 1 -k '*' > $TokenHashFile");

 # Run the input through GPERF to create the perfect hash function
 $hlvmdir = get_hlvm_dir();
 chomp($ModulePath = `pwd`);
 $ModulePath = substr($ModulePath,rindex($ModulePath,"/hlvm/")+6);
 $Module = $ModulePath;
 $Module =~ s|\/|_|g;
 print GPERF "struct TokenMap {\n";
 print GPERF "const char *name; HLVM_$Module"."::".$Schema."Tokens token;\n" ;
 print GPERF "};\n%%\n" ;
 print GPERF "\"$_\", HLVM_".$Module."::TKN_".$_.",\n" foreach @tokens;
 print GPERF "%%\n";
 close GPERF;

 # Generate the header file for the tokenizer, starting it with the preamble for
 # C++ source files
 $TOKEN_LIST = "TKN_" . join(",\n    TKN_",@tokens) . ",";
 $SCHEMA_NAME = $Schema;
 process_file($PreambleFile,$HeaderTemplate,$HeaderFile);
 process_file($PreambleFile,$SourceTemplate,$SourceFile);
	#!/usr/bin/perl
	#
	# Copyright (C) 2002 eXtensible Systems, Inc. All Rights Reserved
	#
	# This program is open source software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License (GPL) as published by
	# the Free Software Foundation; either version 2 of the License, or (at your
	# option) any later version. You should have received a copy of the GPL in a
	# file named COPYING that was included with this program; if not, you can
	# obtain a copy of the license through the Internet at http://www.fsf.org/
	#
	# This program is distributed in the hope that it will be useful, but
	# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	# for more details.
	#-------------------------------------------------------------------------------
	#
	# This script will extract the identifier names from a RelaxNG schema. The
	# identifier names form the token set that act as terminals in the grammar. We
	# make parsing efficient by generating a perfect hash function with gperf from
	# the set of token identifiers.
	#
	# Identifier names are X in the following schema constructs:
	# <element name="X">
	# <attribute name="X">
	# <value>X</value>
	#
	# Usage:
	# mkTokenizer -f <schema_file> <hlvm_src_root>
	#
	use FindBin;
	use lib $FindBin::Bin;
	use mkFuncs;
	use File::Copy;

	$SchemaFile = shift;
	if ("$SchemaFile" eq "-f") {
	$Force = 1;
	$SchemaFile = shift;
	}
	die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$SchemaFile";
	$HLVM_root = get_hlvm_dir();
	die "USAGE: $0 <schema_file> <hlvm_src_root>\n" if -z "$HLVM_root";

	$Schema = $SchemaFile;
	$Schema =~ s/\.rng//;
	$Schema = substr($Schema,rindex($Schema,'/')+1);
	$PreambleFile = $HLVM_root . "/utils/tmplt/Preamble_Code";
	$HeaderTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.h";
	$HeaderFile = $Schema . "Tokenizer.h";
	$SourceTemplate = $HLVM_root . "/utils/tmplt/Tokenizer_Template.cpp";
	$SourceFile = $Schema . "Tokenizer.cpp";
	$TokenHashClass = $Schema . "TokenHash";
	$TokenHashFile = $TokenHashClass . ".i";

	# Check the validity of the files we use/create
	die "Invalid schema file name" if ! -e "$SchemaFile";
	die "ERROR: '$PreambleFile' doesn't exist" if ! -e "$PreambleFile";
	if (!$Force) {
	die "ERROR: '$HeaderFile' exists" if -e "$HeaderFile";
	die "ERROR: '$SourceFile' exists" if -e "$SourceFile";
	die "ERROR: '$TokenHashFile' exists" if -e "$TokenHashFile";
	}

	# Get the plain old schema name from the file name

	sub sortUnique
	{
	my @list = @_;
	my %set;
	my @result = ();
	for ($i = 0; $i <= $#_; $i++)
	{
	if (!exists $set{$list[$i]} )
	{
	$set{$_[$i]} = 1;
	push @result, $_[$i];
	}
	}
	return sort(@result);
	}

	sub getTokens
	{
	my $fname = shift(@_);
	my $stage = 0;
	my @tokens;

	open( SCHEMA, "< $fname") \|\| die "Couldn't open $fname for reading.\n";

	while ( defined($line = <SCHEMA>) )
	{
	while ($line =~ /<element[^>]name="([^"])"/g) {
	push @tokens,$1;
	}
	while ($line =~ /<attribute[^>]name="([^"])"/g) {
	push @tokens,$1;
	}
	while ($line =~ /<value>\s([^<\s])/g) {
	push @tokens,$1;
	}
	}
	close SCHEMA;

	return sortUnique(@tokens);
	}

	# Extract the terminal tokens from the schema file
	my @tokens = getTokens($SchemaFile);

	# Set up a gperf invocation to convert the token list into a perfect hash
	# function
	open(GPERF,"\| gperf -tcDCIoGl --fast 0 -L C++ -Z $TokenHashClass -s 2 -S 1 -k '*' > $TokenHashFile");

	# Run the input through GPERF to create the perfect hash function
	$hlvmdir = get_hlvm_dir();
	chomp($ModulePath = `pwd`);
	$ModulePath = substr($ModulePath,rindex($ModulePath,"/hlvm/")+6);
	$Module = $ModulePath;
	$Module =~ s\|\/\|_\|g;
	print GPERF "struct TokenMap {\n";
	print GPERF "const char *name; HLVM_$Module"."::".$Schema."Tokens token;\n" ;
	print GPERF "};\n%%\n" ;
	print GPERF "\"$_\", HLVM_".$Module."::TKN_".$_.",\n" foreach @tokens;
	print GPERF "%%\n";
	close GPERF;

	# Generate the header file for the tokenizer, starting it with the preamble for
	# C++ source files
	$TOKEN_LIST = "TKN_" . join(",\n TKN_",@tokens) . ",";
	$SCHEMA_NAME = $Schema;
	process_file($PreambleFile,$HeaderTemplate,$HeaderFile);
	process_file($PreambleFile,$SourceTemplate,$SourceFile);