Skip to content

Instantly share code, notes, and snippets.

@Konamiman
Last active November 28, 2017 12:01
Show Gist options
  • Save Konamiman/90c103c11590ab1f9dff2b4d08e973fa to your computer and use it in GitHub Desktop.
Save Konamiman/90c103c11590ab1f9dff2b4d08e973fa to your computer and use it in GitHub Desktop.
MSX-BASIC tokenized file parser written in Ruby
# MSX BASIC tokenized file parser written in Ruby
# By Konamiman, http://www.konamiman.com
# This is my first Ruby thing, so improvement suggestions are highly welcome
def main
check_parameters
@file_bytes_enumerator = read_file_bytes.each
@output_file = create_output_file
discard_text_start_code
until (parsed_line = get_and_parse_line).nil?
@output_file ? @output_file.puts(parsed_line) : puts(parsed_line)
end
terminate
end
def check_parameters
return if !ARGV.empty?
puts <<~explanation
MSX-BASIC tokenized file parser
By Konamiman, 11/2017
Usage: #{File.basename(__FILE__)} <input file> [<output file>]
Result is dumped to STDOUT if <output file> is omitted
Reference: MSX2 Technical Handbook, Chapter 2, Section 3.3
http://www.konamiman.com/msx/msx2th/th-2.txt
explanation
terminate
end
def terminate(message = nil)
@output_file.close if @output_file
abort(message) if message
exit(0)
end
def read_file_bytes
file_name = ARGV[0]
terminate("*** File '#{file_name}' not found!") if !File.exist?(file_name)
begin
file_bytes = File.open(file_name, 'rb', &:read).bytes
rescue StandardError => ex
terminate("*** Error when reading input file: #{ex.message}")
end
file_bytes
end
def create_output_file
return nil if ARGV.length < 2
begin
return File.new(ARGV[1], 'w')
rescue StandardError => ex
terminate("*** Error when creating output file: #{ex.message}")
end
end
def get_byte
@file_bytes_enumerator.next
rescue StopIteration
terminate('*** End of file reached before finding the end of program mark')
end
def peek_byte
@file_bytes_enumerator.peek
end
def discard_text_start_code
if (the_byte = get_byte) != 255
terminate("*** Expected an initial 255 byte, got #{the_byte} instead")
end
end
def get_and_parse_line
next_line_pointer = extract_two_byte_integer
return nil if next_line_pointer.zero?
line_number = extract_two_byte_integer
line_text = line_number.to_s + ' '
inside_string = false
end_of_line_found = false
until end_of_line_found
the_byte = get_byte
if the_byte.zero?
end_of_line_found = true
next
end
if inside_string
inside_string = false if the_byte == '"'
line_text << the_byte
next
end
case the_byte
when '"' # start of string literal
inside_string = true
line_text << the_byte
when 0xE6 # comes after '
when 0x0B # octal number
line_text << '&O' + extract_two_byte_integer.to_s(8)
when 0x0C # hex number
line_text << '&H' + format('%X', extract_two_byte_integer)
when 0x0D # line number after RUN
terminate('*** ERROR: Got an absolute line number address. This should happen only at runtime, not inside a BASIC file!')
when 0x0E, 0x1C # line number before RUN, or integer between 256 and 32767
line_text << extract_two_byte_integer.to_s
when 0x0F # number from 10 to 255
line_text << get_byte.to_s
when 0x11..0x1A # line number from 0 to 9
line_text << (the_byte - 0x11).to_s
when 0x1D # single precision real
line_text << parse_real_number((1..4).collect { get_byte })
when 0x1F # double precision real
line_text << parse_real_number((1..8).collect { get_byte })
when 0x3A # colon or token prefixed with 3A
next_byte = peek_byte
if @tokens_prefixed_with_3A.key?(next_byte)
line_text << @tokens_prefixed_with_3A[next_byte]
get_byte
else
line_text << the_byte
end
when 0xFF # token prefixed with FF
line_text << @tokens_prefixed_with_FF[get_byte]
else
line_text << if @single_byte_tokens.key?(the_byte)
@single_byte_tokens[the_byte]
else
the_byte
end
end
end
line_text
end
def extract_two_byte_integer
get_byte + 256 * get_byte
end
def parse_bcd(byte)
[byte >> 4, byte & 0x0F]
end
def parse_real_number(number_bytes)
# TODO: use exponential notation for very big or very small exponents
exponent = number_bytes[0] - 64
mantissa_bytes = number_bytes.drop(1)
return '0' if exponent == -64
parsed = ''
if exponent <= 0
extra_zeros_count = exponent.abs
parsed = '0.' + ('0' * extra_zeros_count)
mantissa_bytes.each do |mantissa_byte|
bcd_digits = parse_bcd(mantissa_byte)
parsed << bcd_digits.join('')
end
parsed.gsub!(/0+$/, '')
else
has_point = false
mantissa_bytes.each do |mantissa_byte|
bcd_digits = parse_bcd(mantissa_byte)
bcd_digits.each do |d|
parsed << d.to_s
exponent -= 1
if exponent.zero?
has_point = true
parsed << '.'
end
end
end
if has_point
parsed.gsub!(/\.?0+$/, '')
elsif exponent > 0
parsed << '0' * exponent
end
end
parsed
end
@single_byte_tokens = {
0xE2 => 'ERR',
0xEE => '>',
0xBF => 'PAINT',
0xEF => '=',
0xA6 => 'ERROR',
0xF0 => '<',
0xF1 => '+',
0xB1 => 'FIELD',
0xC1 => 'PLAY',
0xF2 => '-',
0xB7 => 'FILES',
0xED => 'POINT',
0xF3 => '*',
0x98 => 'POKE',
0xF4 => '/',
0xDE => 'FN',
0xF5 => '^',
0x82 => 'FOR',
0xC3 => 'PRESET',
0xFC => '\\',
0x91 => 'PRINT',
0xC2 => 'PSET',
0xF6 => 'AND',
0xB2 => 'GET',
0xB3 => 'PUT',
0x8D => 'GOSUB',
0x87 => 'READ',
0x89 => 'GOTO',
0xE9 => 'ATTR$',
0xAA => 'RENUM',
0xA9 => 'AUTO',
0x8B => 'IF',
0x8C => 'RESTORE',
0xC9 => 'BASE',
0xFA => 'IMP',
0xA7 => 'RESUME',
0xC0 => 'BEEP',
0xEC => 'INKEY$',
0x8E => 'RETURN',
0xCF => 'BLOAD',
0x85 => 'INPUT',
0xD0 => 'BSAVE',
0xE5 => 'INSTR',
0xB9 => 'RSET',
0xCA => 'CALL',
0x8A => 'RUN',
0xD5 => 'IPL',
0xBA => 'SAVE',
0xCC => 'KEY',
0xC5 => 'SCREEN',
0xD4 => 'KILL',
0xD2 => 'SET',
0xBC => 'CIRCLE',
0x92 => 'CLEAR',
0x9B => 'CLOAD',
0x88 => 'LET',
0xC4 => 'SOUND',
0xB4 => 'CLOSE',
0xBB => 'LFILES',
0x9F => 'CLS',
0xAF => 'LINE',
0xDF => 'SPC(',
0xD7 => 'CMD',
0x93 => 'LIST',
0xC7 => 'SPRITE',
0xBD => 'COLOR',
0x9E => 'LLIST',
0x99 => 'CONT',
0xB5 => 'LOAD',
0xDC => 'STEP',
0xD6 => 'COPY',
0xD8 => 'LOCATE',
0x90 => 'STOP',
0x9A => 'CSAVE',
0xE8 => 'CSRLIN',
0xE3 => 'STRING$',
0x9D => 'LPRINT',
0xA4 => 'SWAP',
0xB8 => 'LSET',
0xDB => 'TAB(',
0xCD => 'MAX',
0x84 => 'DATA',
0xB6 => 'MERGE',
0xDA => 'THEN',
0x97 => 'DEF',
0xCB => 'TIME',
0xAE => 'DEFDBL',
0xD9 => 'TO',
0xAC => 'DEFINT',
0xA3 => 'TROFF',
0xAD => 'DEFSNG',
0xA2 => 'TRON',
0xAB => 'DEFSTR',
0xFB => 'MOD',
0xE4 => 'USING',
0xA8 => 'DELETE',
0xCE => 'MOTOR',
0xDD => 'USR',
0x86 => 'DIM',
0xD3 => 'NAME',
0xBE => 'DRAW',
0x94 => 'NEW',
0xE7 => 'VARPTR',
0x83 => 'NEXT',
0xC8 => 'VDP',
0xEA => 'DSKI$',
0xE0 => 'NOT',
0xD1 => 'DSKO$',
0xC6 => 'VPOKE',
0xEB => 'OFF',
0x96 => 'WAIT',
0x81 => 'END',
0x95 => 'ON',
0xA0 => 'WIDTH',
0xB0 => 'OPEN',
0xF8 => 'XOR',
0xF9 => 'EQV',
0xF7 => 'OR',
0xA5 => 'ERASE',
0x9C => 'OUT',
0xE1 => 'ERL',
# Careful! MSX2 Technical Handbook says that REM has bytes 3A 8F,
# but actually REM has just 8F and ' has 3A 8F E6
0x8F => 'REM'
}
@tokens_prefixed_with_3A = {
0x8F => "'",
0xA1 => 'ELSE'
}
@tokens_prefixed_with_FF = {
0xA4 => 'PDL',
0x8B => 'EXP',
0x97 => 'PEEK',
0xA1 => 'FIX',
0x91 => 'POS',
0xA7 => 'FPOS',
0x86 => 'ABS',
0x8F => 'FRE',
0x95 => 'ASC',
0x8E => 'ATN',
0x9B => 'HEX$',
0x9D => 'BIN$',
0x90 => 'INP',
0x82 => 'RIGHT$',
0x88 => 'RND',
0x85 => 'INT',
0xA0 => 'CDBL',
0x96 => 'CHR$',
0x9E => 'CINT',
0x81 => 'LEFT$',
0x84 => 'SGN',
0x92 => 'LEN',
0x89 => 'SIN',
0x99 => 'SPACE$',
0x87 => 'SQR',
0xAC => 'LOC',
0xA2 => 'STICK',
0x8C => 'COS',
0xAD => 'LOF',
0x93 => 'STR$',
0x9F => 'CSNG',
0x8A => 'LOG',
0xA3 => 'STRIG',
0x9C => 'LPOS',
0xAA => 'CVD',
0xA8 => 'CVI',
0xA9 => 'CVS',
0x8D => 'TAN',
0x83 => 'MID$',
0xB0 => 'MKD$',
0xAE => 'MKI$',
0xAF => 'MKS$',
0x94 => 'VAL',
0xA6 => 'DSKF',
0x98 => 'VPEEK',
0x9A => 'OCT$',
0xAB => 'EOF',
0xA5 => 'PAD'
}
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment