Last active
November 28, 2017 12:01
-
-
Save Konamiman/90c103c11590ab1f9dff2b4d08e973fa to your computer and use it in GitHub Desktop.
MSX-BASIC tokenized file parser written in Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MSX BASIC tokenized file parser written in Ruby | |
# By Konamiman, http://www.konamiman.com | |
# This is my first Ruby thing, so improvement suggestions are highly welcome | |
def main | |
check_parameters | |
@file_bytes_enumerator = read_file_bytes.each | |
@output_file = create_output_file | |
discard_text_start_code | |
until (parsed_line = get_and_parse_line).nil? | |
@output_file ? @output_file.puts(parsed_line) : puts(parsed_line) | |
end | |
terminate | |
end | |
def check_parameters | |
return if !ARGV.empty? | |
puts <<~explanation | |
MSX-BASIC tokenized file parser | |
By Konamiman, 11/2017 | |
Usage: #{File.basename(__FILE__)} <input file> [<output file>] | |
Result is dumped to STDOUT if <output file> is omitted | |
Reference: MSX2 Technical Handbook, Chapter 2, Section 3.3 | |
http://www.konamiman.com/msx/msx2th/th-2.txt | |
explanation | |
terminate | |
end | |
def terminate(message = nil) | |
@output_file.close if @output_file | |
abort(message) if message | |
exit(0) | |
end | |
def read_file_bytes | |
file_name = ARGV[0] | |
terminate("*** File '#{file_name}' not found!") if !File.exist?(file_name) | |
begin | |
file_bytes = File.open(file_name, 'rb', &:read).bytes | |
rescue StandardError => ex | |
terminate("*** Error when reading input file: #{ex.message}") | |
end | |
file_bytes | |
end | |
def create_output_file | |
return nil if ARGV.length < 2 | |
begin | |
return File.new(ARGV[1], 'w') | |
rescue StandardError => ex | |
terminate("*** Error when creating output file: #{ex.message}") | |
end | |
end | |
def get_byte | |
@file_bytes_enumerator.next | |
rescue StopIteration | |
terminate('*** End of file reached before finding the end of program mark') | |
end | |
def peek_byte | |
@file_bytes_enumerator.peek | |
end | |
def discard_text_start_code | |
if (the_byte = get_byte) != 255 | |
terminate("*** Expected an initial 255 byte, got #{the_byte} instead") | |
end | |
end | |
def get_and_parse_line | |
next_line_pointer = extract_two_byte_integer | |
return nil if next_line_pointer.zero? | |
line_number = extract_two_byte_integer | |
line_text = line_number.to_s + ' ' | |
inside_string = false | |
end_of_line_found = false | |
until end_of_line_found | |
the_byte = get_byte | |
if the_byte.zero? | |
end_of_line_found = true | |
next | |
end | |
if inside_string | |
inside_string = false if the_byte == '"' | |
line_text << the_byte | |
next | |
end | |
case the_byte | |
when '"' # start of string literal | |
inside_string = true | |
line_text << the_byte | |
when 0xE6 # comes after ' | |
when 0x0B # octal number | |
line_text << '&O' + extract_two_byte_integer.to_s(8) | |
when 0x0C # hex number | |
line_text << '&H' + format('%X', extract_two_byte_integer) | |
when 0x0D # line number after RUN | |
terminate('*** ERROR: Got an absolute line number address. This should happen only at runtime, not inside a BASIC file!') | |
when 0x0E, 0x1C # line number before RUN, or integer between 256 and 32767 | |
line_text << extract_two_byte_integer.to_s | |
when 0x0F # number from 10 to 255 | |
line_text << get_byte.to_s | |
when 0x11..0x1A # line number from 0 to 9 | |
line_text << (the_byte - 0x11).to_s | |
when 0x1D # single precision real | |
line_text << parse_real_number((1..4).collect { get_byte }) | |
when 0x1F # double precision real | |
line_text << parse_real_number((1..8).collect { get_byte }) | |
when 0x3A # colon or token prefixed with 3A | |
next_byte = peek_byte | |
if @tokens_prefixed_with_3A.key?(next_byte) | |
line_text << @tokens_prefixed_with_3A[next_byte] | |
get_byte | |
else | |
line_text << the_byte | |
end | |
when 0xFF # token prefixed with FF | |
line_text << @tokens_prefixed_with_FF[get_byte] | |
else | |
line_text << if @single_byte_tokens.key?(the_byte) | |
@single_byte_tokens[the_byte] | |
else | |
the_byte | |
end | |
end | |
end | |
line_text | |
end | |
def extract_two_byte_integer | |
get_byte + 256 * get_byte | |
end | |
def parse_bcd(byte) | |
[byte >> 4, byte & 0x0F] | |
end | |
def parse_real_number(number_bytes) | |
# TODO: use exponential notation for very big or very small exponents | |
exponent = number_bytes[0] - 64 | |
mantissa_bytes = number_bytes.drop(1) | |
return '0' if exponent == -64 | |
parsed = '' | |
if exponent <= 0 | |
extra_zeros_count = exponent.abs | |
parsed = '0.' + ('0' * extra_zeros_count) | |
mantissa_bytes.each do |mantissa_byte| | |
bcd_digits = parse_bcd(mantissa_byte) | |
parsed << bcd_digits.join('') | |
end | |
parsed.gsub!(/0+$/, '') | |
else | |
has_point = false | |
mantissa_bytes.each do |mantissa_byte| | |
bcd_digits = parse_bcd(mantissa_byte) | |
bcd_digits.each do |d| | |
parsed << d.to_s | |
exponent -= 1 | |
if exponent.zero? | |
has_point = true | |
parsed << '.' | |
end | |
end | |
end | |
if has_point | |
parsed.gsub!(/\.?0+$/, '') | |
elsif exponent > 0 | |
parsed << '0' * exponent | |
end | |
end | |
parsed | |
end | |
@single_byte_tokens = { | |
0xE2 => 'ERR', | |
0xEE => '>', | |
0xBF => 'PAINT', | |
0xEF => '=', | |
0xA6 => 'ERROR', | |
0xF0 => '<', | |
0xF1 => '+', | |
0xB1 => 'FIELD', | |
0xC1 => 'PLAY', | |
0xF2 => '-', | |
0xB7 => 'FILES', | |
0xED => 'POINT', | |
0xF3 => '*', | |
0x98 => 'POKE', | |
0xF4 => '/', | |
0xDE => 'FN', | |
0xF5 => '^', | |
0x82 => 'FOR', | |
0xC3 => 'PRESET', | |
0xFC => '\\', | |
0x91 => 'PRINT', | |
0xC2 => 'PSET', | |
0xF6 => 'AND', | |
0xB2 => 'GET', | |
0xB3 => 'PUT', | |
0x8D => 'GOSUB', | |
0x87 => 'READ', | |
0x89 => 'GOTO', | |
0xE9 => 'ATTR$', | |
0xAA => 'RENUM', | |
0xA9 => 'AUTO', | |
0x8B => 'IF', | |
0x8C => 'RESTORE', | |
0xC9 => 'BASE', | |
0xFA => 'IMP', | |
0xA7 => 'RESUME', | |
0xC0 => 'BEEP', | |
0xEC => 'INKEY$', | |
0x8E => 'RETURN', | |
0xCF => 'BLOAD', | |
0x85 => 'INPUT', | |
0xD0 => 'BSAVE', | |
0xE5 => 'INSTR', | |
0xB9 => 'RSET', | |
0xCA => 'CALL', | |
0x8A => 'RUN', | |
0xD5 => 'IPL', | |
0xBA => 'SAVE', | |
0xCC => 'KEY', | |
0xC5 => 'SCREEN', | |
0xD4 => 'KILL', | |
0xD2 => 'SET', | |
0xBC => 'CIRCLE', | |
0x92 => 'CLEAR', | |
0x9B => 'CLOAD', | |
0x88 => 'LET', | |
0xC4 => 'SOUND', | |
0xB4 => 'CLOSE', | |
0xBB => 'LFILES', | |
0x9F => 'CLS', | |
0xAF => 'LINE', | |
0xDF => 'SPC(', | |
0xD7 => 'CMD', | |
0x93 => 'LIST', | |
0xC7 => 'SPRITE', | |
0xBD => 'COLOR', | |
0x9E => 'LLIST', | |
0x99 => 'CONT', | |
0xB5 => 'LOAD', | |
0xDC => 'STEP', | |
0xD6 => 'COPY', | |
0xD8 => 'LOCATE', | |
0x90 => 'STOP', | |
0x9A => 'CSAVE', | |
0xE8 => 'CSRLIN', | |
0xE3 => 'STRING$', | |
0x9D => 'LPRINT', | |
0xA4 => 'SWAP', | |
0xB8 => 'LSET', | |
0xDB => 'TAB(', | |
0xCD => 'MAX', | |
0x84 => 'DATA', | |
0xB6 => 'MERGE', | |
0xDA => 'THEN', | |
0x97 => 'DEF', | |
0xCB => 'TIME', | |
0xAE => 'DEFDBL', | |
0xD9 => 'TO', | |
0xAC => 'DEFINT', | |
0xA3 => 'TROFF', | |
0xAD => 'DEFSNG', | |
0xA2 => 'TRON', | |
0xAB => 'DEFSTR', | |
0xFB => 'MOD', | |
0xE4 => 'USING', | |
0xA8 => 'DELETE', | |
0xCE => 'MOTOR', | |
0xDD => 'USR', | |
0x86 => 'DIM', | |
0xD3 => 'NAME', | |
0xBE => 'DRAW', | |
0x94 => 'NEW', | |
0xE7 => 'VARPTR', | |
0x83 => 'NEXT', | |
0xC8 => 'VDP', | |
0xEA => 'DSKI$', | |
0xE0 => 'NOT', | |
0xD1 => 'DSKO$', | |
0xC6 => 'VPOKE', | |
0xEB => 'OFF', | |
0x96 => 'WAIT', | |
0x81 => 'END', | |
0x95 => 'ON', | |
0xA0 => 'WIDTH', | |
0xB0 => 'OPEN', | |
0xF8 => 'XOR', | |
0xF9 => 'EQV', | |
0xF7 => 'OR', | |
0xA5 => 'ERASE', | |
0x9C => 'OUT', | |
0xE1 => 'ERL', | |
# Careful! MSX2 Technical Handbook says that REM has bytes 3A 8F, | |
# but actually REM has just 8F and ' has 3A 8F E6 | |
0x8F => 'REM' | |
} | |
@tokens_prefixed_with_3A = { | |
0x8F => "'", | |
0xA1 => 'ELSE' | |
} | |
@tokens_prefixed_with_FF = { | |
0xA4 => 'PDL', | |
0x8B => 'EXP', | |
0x97 => 'PEEK', | |
0xA1 => 'FIX', | |
0x91 => 'POS', | |
0xA7 => 'FPOS', | |
0x86 => 'ABS', | |
0x8F => 'FRE', | |
0x95 => 'ASC', | |
0x8E => 'ATN', | |
0x9B => 'HEX$', | |
0x9D => 'BIN$', | |
0x90 => 'INP', | |
0x82 => 'RIGHT$', | |
0x88 => 'RND', | |
0x85 => 'INT', | |
0xA0 => 'CDBL', | |
0x96 => 'CHR$', | |
0x9E => 'CINT', | |
0x81 => 'LEFT$', | |
0x84 => 'SGN', | |
0x92 => 'LEN', | |
0x89 => 'SIN', | |
0x99 => 'SPACE$', | |
0x87 => 'SQR', | |
0xAC => 'LOC', | |
0xA2 => 'STICK', | |
0x8C => 'COS', | |
0xAD => 'LOF', | |
0x93 => 'STR$', | |
0x9F => 'CSNG', | |
0x8A => 'LOG', | |
0xA3 => 'STRIG', | |
0x9C => 'LPOS', | |
0xAA => 'CVD', | |
0xA8 => 'CVI', | |
0xA9 => 'CVS', | |
0x8D => 'TAN', | |
0x83 => 'MID$', | |
0xB0 => 'MKD$', | |
0xAE => 'MKI$', | |
0xAF => 'MKS$', | |
0x94 => 'VAL', | |
0xA6 => 'DSKF', | |
0x98 => 'VPEEK', | |
0x9A => 'OCT$', | |
0xAB => 'EOF', | |
0xA5 => 'PAD' | |
} | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment