Last active
May 13, 2021 18:31
-
-
Save KvanTTT/d95579de257531a3cc15 to your computer and use it in GitHub Desktop.
ANTLR C# runtime code fragment for correct C# code parsing with preprocessor directives (for CSharp grammar in oficial repository: https://github.com/antlr/grammars-v4/tree/master/csharp).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
List<IToken> codeTokens = new List<IToken>(); | |
List<IToken> commentTokens = new List<IToken>(); | |
Lexer preprocessorLexer = new CSharpLexer(new AntlrInputStream(sourceCode)); | |
// Collect all tokens with lexer (CSharpLexer.g4). | |
var tokens = preprocessorLexer.GetAllTokens(); | |
var directiveTokens = new List<IToken>(); | |
var directiveTokenSource = new ListTokenSource(directiveTokens); | |
var directiveTokenStream = new CommonTokenStream(directiveTokenSource, CSharpLexer.DIRECTIVE); | |
CSharpPreprocessorParser preprocessorParser = new CSharpPreprocessorParser(directiveTokenStream); | |
int index = 0; | |
bool compiliedTokens = true; | |
while (index < tokens.Count) | |
{ | |
var token = tokens[index]; | |
if (token.Type == CSharpLexer.SHARP) | |
{ | |
directiveTokens.Clear(); | |
int directiveTokenIndex = index + 1; | |
// Collect all preprocessor directive tokens. | |
while (directiveTokenIndex < tokens.Count && | |
tokens[directiveTokenIndex].Type != CSharpLexer.Eof && | |
tokens[directiveTokenIndex].Type != CSharpLexer.DIRECTIVE_NEW_LINE && | |
tokens[directiveTokenIndex].Type != CSharpLexer.SHARP) | |
{ | |
if (tokens[directiveTokenIndex].Channel == CSharpLexer.COMMENTS_CHANNEL) | |
{ | |
commentTokens.Add(tokens[directiveTokenIndex]); | |
} | |
else if (tokens[directiveTokenIndex].Channel != Lexer.Hidden) | |
{ | |
directiveTokens.Add(tokens[directiveTokenIndex]); | |
} | |
directiveTokenIndex++; | |
} | |
directiveTokenSource = new ListTokenSource(directiveTokens); | |
directiveTokenStream = new CommonTokenStream(directiveTokenSource, CSharpLexer.DIRECTIVE); | |
preprocessorParser.SetInputStream(directiveTokenStream); | |
preprocessorParser.Reset(); | |
// Parse condition in preprocessor directive (based on CSharpPreprocessorParser.g4 grammar). | |
CSharpPreprocessorParser.Preprocessor_directiveContext directive = preprocessorParser.preprocessor_directive(); | |
// if true than next code is valid and not ignored. | |
compiliedTokens = directive.value; | |
index = directiveTokenIndex - 1; | |
} | |
else if (token.Channel == CommentsChannel) | |
{ | |
commentTokens.Add(token); // Colect comment tokens (if required). | |
} | |
else if (token.Channel != Lexer.Hidden && token.Type != CSharpLexer.DIRECTIVE_NEW_LINE && compiliedTokens) | |
{ | |
codeTokens.Add(token); // Collect code tokens. | |
} | |
index++; | |
} | |
// At second stage tokens parsed in usual way. | |
var codeTokenSource = new ListTokenSource(tokens); | |
var codeTokenStream = new CommonTokenStream(codeTokenSource); | |
CSharpParser parser = new CSharpParser(codeTokenStream); | |
// Parse syntax tree (CSharpParser.g4) | |
var compilationUnit = parser.compilation_unit(); |
Also, this code looks wrong. You never reset compiledTokens after the #end . For example if I have #endif , compiledTokens should be set to true, otherwise the code after this will never be considered part of the code tokens
Simple example that doesn't work
using System.Text;
namespace testns {
public class testcls {
public static void Main(string []args) {
#if DEBUG
int x = 2;
#else
int y = 10;
#endif
}
}
}
Works fine by adding this code:
// Parse condition in preprocessor directive (based on CSharpPreprocessorParser.g4 grammar).
CSharpPreprocessorParser.Preprocessor_directiveContext directive = preprocessorParser.preprocessor_directive();
// if true than next code is valid and not ignored.
compiliedTokens = directive.value;
String directiveStr = tokens.get(index+1).getText().trim();
if ("line".equals(directiveStr) || "error".equals(directiveStr) || "warning".equals(directiveStr) || "define".equals(directiveStr) || "endregion".equals(directiveStr) || "endif".equals(directiveStr) || "pragma".equals(directiveStr)) {
compiliedTokens = true;
}
String conditionalSymbol = null;
if ("define".equals(tokens.get(index+1).getText())) {
// add to the conditional symbols
conditionalSymbol = tokens.get(index + 2).getText();
preprocessorParser.ConditionalSymbols.add(conditionalSymbol);
}
if ("undef".equals(tokens.get(index+1).getText())) {
conditionalSymbol = tokens.get(index + 2).getText();
preprocessorParser.ConditionalSymbols.remove(conditionalSymbol);
}
index = directiveTokenIndex - 1;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Shouldn't new ListTokenSource(tokens); be new ListTokenSource(codeTokens); ?