marijnh/parselisp.js

## parselisp.js
// Parses a small subset of Common Lisp as used in AGraph Prolog queries.

LispParser = Editor.Parser = (function() {
  function wordRegexp(words) {
    return new RegExp("^(?:" + words.join("|") + ")$", "i");
  }
  var specialForms = wordRegexp(["select", "select-distinct", "select0-distinct", "select0", "<-", "<--"]);
  var symbolChars = /[^\s\u00a0\(\)#!<]/;

  var tokenizeLisp = (function() {
    function normal(source, setState) {
      var ch = source.next();
      if (/[\(\)!\^]/.test(ch)) {
        return "punctuation";
      }
      else if (ch == "/" && source.equals("/")) {
        while (!source.endOfLine()) source.next();
        return "comment";
      }
      else if (ch == "!" && source.equals("<")) {
        source.nextWhile(matcher(/[^\s\u00a0>]/));
        if (source.equals(">")) source.next();
        return "uri";
      }
      else if (ch == "\"") {
        setState(inString);
        return null;
      }
      else {
        source.nextWhile(matcher(symbolChars));
        if (ch == "?") return "var";
        var word = source.get();
        if (specialForms.test(word))
          return {style: "special", content: word};
        else
          return {style: "symbol", content: word};
      }
    }

    function inString(source, setState) {
      var escaped = false;
      while (!source.endOfLine()) {
        var ch = source.next();
        if (ch == "\"" && !escaped) {
          setState(normal);
          break;
        }
        escaped = ch == "\\";
      }
      return "string";
    };

    return function(source, startState) {
      return tokenizer(source, startState || normal);
    };
  })();

  function indentLisp(context) {
    return function() {return context.indent;};
  }

  function parseLisp(source) {
    var tokens = tokenizeLisp(source);
    var context = {indent: 0, prev: null, typed: null}, col = 0;
    function pushContext() {
      context = {prev: context, indent: col, typed: false};
    }
    function popContext() {
      if (context.prev) context = context.prev;
    }

    var iter = {
      next: function() {
        var token = tokens.next(), type = token.style, content = token.content;

        if (content == "\n") {
          token.indentation = indentLisp(context);
          col = 0;
        }
        else {
          col += token.value.length;
        }

        if (content == "(")
          pushContext();
        else if (content == ")")
          popContext();
        else if (context.typed == false) {
          if (type == "special") {
            context.typed = true;
            context.indent = context.indent + 1;
          }
          else if (type != "newline" && type != "comment" && type != "whitespace") {
            context.typed = true;
            context.indent = col;
          }
        }
        return token;
      },

      copy: function() {
        var _context = context, _col = col, _tokenState = tokens.state;
        return function(source) {
          tokens = tokenizeLisp(source, _tokenState);
          context = _context;
          col = _col;
          return iter;
        };
      }
    };
    return iter;
  }

  return {make: parseLisp};
})();
	// Parses a small subset of Common Lisp as used in AGraph Prolog queries.

	LispParser = Editor.Parser = (function() {
	function wordRegexp(words) {
	return new RegExp("^(?:" + words.join("\|") + ")$", "i");
	}
	var specialForms = wordRegexp(["select", "select-distinct", "select0-distinct", "select0", "<-", "<--"]);
	var symbolChars = /[^\s\u00a0\(\)#!<]/;

	var tokenizeLisp = (function() {
	function normal(source, setState) {
	var ch = source.next();
	if (/[\(\)!\^]/.test(ch)) {
	return "punctuation";
	}
	else if (ch == "/" && source.equals("/")) {
	while (!source.endOfLine()) source.next();
	return "comment";
	}
	else if (ch == "!" && source.equals("<")) {
	source.nextWhile(matcher(/[^\s\u00a0>]/));
	if (source.equals(">")) source.next();
	return "uri";
	}
	else if (ch == "\"") {
	setState(inString);
	return null;
	}
	else {
	source.nextWhile(matcher(symbolChars));
	if (ch == "?") return "var";
	var word = source.get();
	if (specialForms.test(word))
	return {style: "special", content: word};
	else
	return {style: "symbol", content: word};
	}
	}

	function inString(source, setState) {
	var escaped = false;
	while (!source.endOfLine()) {
	var ch = source.next();
	if (ch == "\"" && !escaped) {
	setState(normal);
	break;
	}
	escaped = ch == "\\";
	}
	return "string";
	};

	return function(source, startState) {
	return tokenizer(source, startState \|\| normal);
	};
	})();

	function indentLisp(context) {
	return function() {return context.indent;};
	}

	function parseLisp(source) {
	var tokens = tokenizeLisp(source);
	var context = {indent: 0, prev: null, typed: null}, col = 0;
	function pushContext() {
	context = {prev: context, indent: col, typed: false};
	}
	function popContext() {
	if (context.prev) context = context.prev;
	}

	var iter = {
	next: function() {
	var token = tokens.next(), type = token.style, content = token.content;

	if (content == "\n") {
	token.indentation = indentLisp(context);
	col = 0;
	}
	else {
	col += token.value.length;
	}

	if (content == "(")
	pushContext();
	else if (content == ")")
	popContext();
	else if (context.typed == false) {
	if (type == "special") {
	context.typed = true;
	context.indent = context.indent + 1;
	}
	else if (type != "newline" && type != "comment" && type != "whitespace") {
	context.typed = true;
	context.indent = col;
	}
	}
	return token;
	},

	copy: function() {
	var _context = context, _col = col, _tokenState = tokens.state;
	return function(source) {
	tokens = tokenizeLisp(source, _tokenState);
	context = _context;
	col = _col;
	return iter;
	};
	}
	};
	return iter;
	}

	return {make: parseLisp};
	})();