Skip to content

Instantly share code, notes, and snippets.

@KvanTTT
Last active May 13, 2021 18:31
Show Gist options
  • Save KvanTTT/d95579de257531a3cc15 to your computer and use it in GitHub Desktop.
Save KvanTTT/d95579de257531a3cc15 to your computer and use it in GitHub Desktop.
ANTLR C# runtime code fragment for correct C# code parsing with preprocessor directives (for CSharp grammar in oficial repository: https://github.com/antlr/grammars-v4/tree/master/csharp).
List<IToken> codeTokens = new List<IToken>();
List<IToken> commentTokens = new List<IToken>();
Lexer preprocessorLexer = new CSharpLexer(new AntlrInputStream(sourceCode));
// Collect all tokens with lexer (CSharpLexer.g4).
var tokens = preprocessorLexer.GetAllTokens();
var directiveTokens = new List<IToken>();
var directiveTokenSource = new ListTokenSource(directiveTokens);
var directiveTokenStream = new CommonTokenStream(directiveTokenSource, CSharpLexer.DIRECTIVE);
CSharpPreprocessorParser preprocessorParser = new CSharpPreprocessorParser(directiveTokenStream);
int index = 0;
bool compiliedTokens = true;
while (index < tokens.Count)
{
var token = tokens[index];
if (token.Type == CSharpLexer.SHARP)
{
directiveTokens.Clear();
int directiveTokenIndex = index + 1;
// Collect all preprocessor directive tokens.
while (directiveTokenIndex < tokens.Count &&
tokens[directiveTokenIndex].Type != CSharpLexer.Eof &&
tokens[directiveTokenIndex].Type != CSharpLexer.DIRECTIVE_NEW_LINE &&
tokens[directiveTokenIndex].Type != CSharpLexer.SHARP)
{
if (tokens[directiveTokenIndex].Channel == CSharpLexer.COMMENTS_CHANNEL)
{
commentTokens.Add(tokens[directiveTokenIndex]);
}
else if (tokens[directiveTokenIndex].Channel != Lexer.Hidden)
{
directiveTokens.Add(tokens[directiveTokenIndex]);
}
directiveTokenIndex++;
}
directiveTokenSource = new ListTokenSource(directiveTokens);
directiveTokenStream = new CommonTokenStream(directiveTokenSource, CSharpLexer.DIRECTIVE);
preprocessorParser.SetInputStream(directiveTokenStream);
preprocessorParser.Reset();
// Parse condition in preprocessor directive (based on CSharpPreprocessorParser.g4 grammar).
CSharpPreprocessorParser.Preprocessor_directiveContext directive = preprocessorParser.preprocessor_directive();
// if true than next code is valid and not ignored.
compiliedTokens = directive.value;
index = directiveTokenIndex - 1;
}
else if (token.Channel == CommentsChannel)
{
commentTokens.Add(token); // Colect comment tokens (if required).
}
else if (token.Channel != Lexer.Hidden && token.Type != CSharpLexer.DIRECTIVE_NEW_LINE && compiliedTokens)
{
codeTokens.Add(token); // Collect code tokens.
}
index++;
}
// At second stage tokens parsed in usual way.
var codeTokenSource = new ListTokenSource(tokens);
var codeTokenStream = new CommonTokenStream(codeTokenSource);
CSharpParser parser = new CSharpParser(codeTokenStream);
// Parse syntax tree (CSharpParser.g4)
var compilationUnit = parser.compilation_unit();
@attodorov
Copy link

Shouldn't new ListTokenSource(tokens); be new ListTokenSource(codeTokens); ?

@attodorov
Copy link

Also, this code looks wrong. You never reset compiledTokens after the #end . For example if I have #endif , compiledTokens should be set to true, otherwise the code after this will never be considered part of the code tokens

@attodorov
Copy link

attodorov commented May 24, 2018

Simple example that doesn't work

using System.Text;  
namespace testns {
public class testcls {
                public static void Main(string []args) {
        #if DEBUG
                int x = 2;
        #else
                int y  = 10;
        #endif
                }
        }
}

@attodorov
Copy link

attodorov commented May 24, 2018

Works fine by adding this code:

      // Parse condition in preprocessor directive (based on CSharpPreprocessorParser.g4 grammar).
		        CSharpPreprocessorParser.Preprocessor_directiveContext directive = preprocessorParser.preprocessor_directive();
		        // if true than next code is valid and not ignored.
		        compiliedTokens = directive.value;
		        String directiveStr = tokens.get(index+1).getText().trim();
		    	if ("line".equals(directiveStr) || "error".equals(directiveStr) || "warning".equals(directiveStr) || "define".equals(directiveStr) || "endregion".equals(directiveStr) || "endif".equals(directiveStr) || "pragma".equals(directiveStr)) {
		    		compiliedTokens = true;
		    	} 
		    	String conditionalSymbol = null;
		    	if ("define".equals(tokens.get(index+1).getText())) {
		    		// add to the conditional symbols 
		    		conditionalSymbol = tokens.get(index + 2).getText();
		    		preprocessorParser.ConditionalSymbols.add(conditionalSymbol);
		    	} 
		    	if ("undef".equals(tokens.get(index+1).getText())) {
		    		conditionalSymbol = tokens.get(index + 2).getText();
		    		preprocessorParser.ConditionalSymbols.remove(conditionalSymbol);
		    	}
		        index = directiveTokenIndex - 1;

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment