fabioyamate/gist:242115

## gistfile1.rb
require 'grammar_wirth'
require 'pp'

NONTERMINAL = /[a-zA-Z][a-zA-Z_]*/
TERMINAL = /".+"/
RULES = /.*\./
RULE = /(?:([a-zA-Z][a-zA-Z_]*)\s*=)?\s*(.+\.)\n?/

TOKENS = {
  "(" => "LPAR",
  ")" => "RPAR",
  "{" => "LCUR",
  "}" => "RCUR",
  "[" => "LBRA",
  "]" => "RBRA",
  "=" => "ASSIGN",
  "+=" => "ADD_ASSIGN",
  "-=" => "SUB_ASSIGN",
  "*=" => "MULT_ASSIGN",
  "/=" => "DIV_ASSIGN",
  "**=" => "PWR_ASSIGN",
  "%=" => "MOD_ASSIGN",
  "<" => "LT",
  ">" => "GT",
  "<=" => "LE",
  ">=" => "GE",
  "!=" => "NE",
  "==" => "EQ",
  "*" => "MULT",
  "+" => "ADD",
  "-" => "SUB",
  "/" => "DIV",
  "%" => "MOD",
  "**" => "PWR",
  ".." => "RANGE"
}

TEMPLATE = <<-CODE
void %nonterminal%()
{
  while (true) {
    int current_state = 0;
    switch (current_state) {
%transitions%
      default:
        fatal_error("unexpected state");
        break;
    }
  }
}
CODE

def submachine_codegen(input, to)
  <<-EOF
        #{input}(); /* CALL SUB MACHINE */
        current_state = #{to};
EOF
end

def token_codegen(input, to)
  if input.eql?("ANY_CHAR")
  <<-EOF
        if (' ' <= ch && ch <= '~')
          current_state = #{to};
EOF
  else
    input = TOKENS[input] if TOKENS.has_key?(input)
    input = '"\""' if input.eql?('"')
    input = %q["'"] if input.eql?("'")
  <<-EOF
        if (token->class == #{input})
          current_state = #{to};
EOF
  end
end

lexer = <<-LEXER
string = """ { "any_char" } """ | "'" { "any_char" } "'".
op_assign = "=" | "+=" | "/=" | "-=" | "%=" | "**=" .
op_expr = "<" | ">" | ">=" | "<=" | "!=" | "==" | "+" | "-" | "/" | "*" | "%" | "**" | "..".
integer = digit { digit }.
float = digit { digit } "." digit { digit }.
operation = identifier [ "!" | "?" ].
identifier = ( "_" | letter | letter ) { "_" | letter | digit }.
number = digit { digit }.
LEXER

file = File.new("lexer.c", "w")
output = ""
lexer.scan(RULES).each do |r|
  name, decl = RULE.match(r).captures
  w = Grammar::Wirth.new(decl)
  dfa = w.minimized_dfa
  dfa[:states].each do |state|
    output << "      case #{state}:\n"
    moves = []
    first = true
    dfa[:transitions][state].each do |transition|
      input, to = transition
      data = /"(.+)"/.match(input)
      if first
        output << "        ch = readc(in);\n"
        first = false
      end
      if not data.nil?
        moves << token_codegen(data.captures.first.upcase, to)
      else
        moves << submachine_codegen(input, to)
      end
    end
    output << moves.join('        else ')
    if moves.size > 1
      output << %q[        else
          fatal_error("Syntax Error");
]
    end

    output << "        return; /* ACCEPT */\n" if dfa[:final].include?(state)
    output << "        break;\n"
  end


  file << TEMPLATE.sub('%nonterminal%', name).sub('%transitions%', output).gsub(/else\s+if/, "else if")

end
  file.close
	require 'grammar_wirth'
	require 'pp'

	NONTERMINAL = /[a-zA-Z][a-zA-Z_]*/
	TERMINAL = /".+"/
	RULES = /.*\./
	RULE = /(?:([a-zA-Z][a-zA-Z_])\s=)?\s*(.+\.)\n?/

	TOKENS = {
	"(" => "LPAR",
	")" => "RPAR",
	"{" => "LCUR",
	"}" => "RCUR",
	"[" => "LBRA",
	"]" => "RBRA",
	"=" => "ASSIGN",
	"+=" => "ADD_ASSIGN",
	"-=" => "SUB_ASSIGN",
	"*=" => "MULT_ASSIGN",
	"/=" => "DIV_ASSIGN",
	"**=" => "PWR_ASSIGN",
	"%=" => "MOD_ASSIGN",
	"<" => "LT",
	">" => "GT",
	"<=" => "LE",
	">=" => "GE",
	"!=" => "NE",
	"==" => "EQ",
	"*" => "MULT",
	"+" => "ADD",
	"-" => "SUB",
	"/" => "DIV",
	"%" => "MOD",
	"**" => "PWR",
	".." => "RANGE"
	}

	TEMPLATE = <<-CODE
	void %nonterminal%()
	{
	while (true) {
	int current_state = 0;
	switch (current_state) {
	%transitions%
	default:
	fatal_error("unexpected state");
	break;
	}
	}
	}
	CODE

	def submachine_codegen(input, to)
	<<-EOF
	#{input}(); /* CALL SUB MACHINE */
	current_state = #{to};
	EOF
	end

	def token_codegen(input, to)
	if input.eql?("ANY_CHAR")
	<<-EOF
	if (' ' <= ch && ch <= '~')
	current_state = #{to};
	EOF
	else
	input = TOKENS[input] if TOKENS.has_key?(input)
	input = '"\""' if input.eql?('"')
	input = %q["'"] if input.eql?("'")
	<<-EOF
	if (token->class == #{input})
	current_state = #{to};
	EOF
	end
	end

	lexer = <<-LEXER
	string = """ { "any_char" } """ \| "'" { "any_char" } "'".
	op_assign = "=" \| "+=" \| "/=" \| "-=" \| "%=" \| "**=" .
	op_expr = "<" \| ">" \| ">=" \| "<=" \| "!=" \| "==" \| "+" \| "-" \| "/" \| "" \| "%" \| "*" \| "..".
	integer = digit { digit }.
	float = digit { digit } "." digit { digit }.
	operation = identifier [ "!" \| "?" ].
	identifier = ( "_" \| letter \| letter ) { "_" \| letter \| digit }.
	number = digit { digit }.
	LEXER

	file = File.new("lexer.c", "w")
	output = ""
	lexer.scan(RULES).each do \|r\|
	name, decl = RULE.match(r).captures
	w = Grammar::Wirth.new(decl)
	dfa = w.minimized_dfa
	dfa[:states].each do \|state\|
	output << " case #{state}:\n"
	moves = []
	first = true
	dfa[:transitions][state].each do \|transition\|
	input, to = transition
	data = /"(.+)"/.match(input)
	if first
	output << " ch = readc(in);\n"
	first = false
	end
	if not data.nil?
	moves << token_codegen(data.captures.first.upcase, to)
	else
	moves << submachine_codegen(input, to)
	end
	end
	output << moves.join(' else ')
	if moves.size > 1
	output << %q[ else
	fatal_error("Syntax Error");
	]
	end

	output << " return; /* ACCEPT */\n" if dfa[:final].include?(state)
	output << " break;\n"
	end


	file << TEMPLATE.sub('%nonterminal%', name).sub('%transitions%', output).gsub(/else\s+if/, "else if")

	end
	file.close