1 // 2 // Copyright (c) 2010-2023 Antmicro 3 // 4 // This file is licensed under the MIT License. 5 // Full license text is available in 'licenses/MIT.txt'. 6 // 7 using System; 8 using System.Collections.Generic; 9 using System.Linq; 10 using System.Text; 11 using System.Text.RegularExpressions; 12 13 namespace Antmicro.Renode.PlatformDescription 14 { 15 public static class PreLexer 16 { Process(string source, string path = R)17 public static IEnumerable<string> Process(string source, string path = "") 18 { 19 // We remove '\r' so that we don't have to worry about line endings. 20 var sourceInLines = source.Replace("\r", string.Empty).Split(new[] { '\n' }, StringSplitOptions.None); 21 var lineSource = HandleMultilineStrings(sourceInLines, path); 22 var inputBraceLevel = 0; 23 var outputBraceLevel = 0; 24 25 var lineNo = -1; 26 var enumerator = HandleComments(lineSource, path).GetEnumerator(); 27 var started = false; // will change to true if the file is not empty (or has only empty lines) 28 while(enumerator.MoveNext()) 29 { 30 lineNo++; 31 if(!string.IsNullOrWhiteSpace(enumerator.Current)) 32 { 33 started = true; 34 break; 35 } 36 yield return enumerator.Current; 37 } 38 if(!started) 39 { 40 yield break; 41 } 42 43 var oldLine = enumerator.Current; 44 var oldIndentLevel = GetIndentLevel(oldLine, lineNo, path); 45 46 if(oldIndentLevel != 0) 47 { 48 throw GetException(ParsingError.WrongIndent, lineNo, 0, oldLine, "First line with text cannot be indented.", path); 49 } 50 51 if(!enumerator.MoveNext()) 52 { 53 yield return oldLine; 54 AccountBraceLevel(oldLine, ref outputBraceLevel); 55 yield break; 56 } 57 58 var numberOfEmptyLines = 0; 59 60 do 61 { 62 lineNo++; 63 AccountBraceLevel(oldLine, ref inputBraceLevel); 64 var newLine = enumerator.Current; 65 66 // pass through all empty lines 67 while(string.IsNullOrWhiteSpace(newLine)) 68 { 69 numberOfEmptyLines++; 70 if(!enumerator.MoveNext()) 71 { 72 goto finish; 73 } 74 newLine = enumerator.Current; 75 } 76 77 if(inputBraceLevel > 0) 78 { 79 AccountBraceLevel(oldLine, ref outputBraceLevel); 80 yield return oldLine; 81 } 82 else 83 { 84 var newIndentLevel = GetIndentLevel(newLine, lineNo, path); 85 var result = DecorateLineIfNecessary(oldLine, oldIndentLevel, newIndentLevel, false); 86 yield return result; 87 AccountBraceLevel(result, ref outputBraceLevel); 88 oldIndentLevel = newIndentLevel; 89 } 90 91 for(var i = 0; i < numberOfEmptyLines; i++) 92 { 93 yield return ""; 94 } 95 numberOfEmptyLines = 0; 96 97 oldLine = newLine; 98 } 99 while(enumerator.MoveNext()); 100 finish: 101 AccountBraceLevel(oldLine, ref inputBraceLevel); 102 oldLine = DecorateLineIfNecessary(oldLine, oldIndentLevel, 0, true); 103 yield return oldLine; 104 AccountBraceLevel(oldLine, ref outputBraceLevel); 105 if(inputBraceLevel == 0 && outputBraceLevel != 0) 106 { 107 // we only check output brace level if input was balanced, otherwise it does not make sense 108 throw new ParsingException(ParsingError.InternalPrelexerError, "Internal prelexer error, unbalanced output with balanced input."); 109 } 110 111 for(var i = 0; i < numberOfEmptyLines; i++) 112 { 113 yield return ""; 114 } 115 } 116 HandleComments(IEnumerable<string> lineSource, string path)117 private static IEnumerable<string> HandleComments(IEnumerable<string> lineSource, string path) 118 { 119 var inMultilineComment = false; 120 var localBraceLevel = 0; 121 var lineNo = -1; 122 foreach(var originalLine in lineSource) 123 { 124 lineNo++; 125 var line = originalLine; 126 var currentIndex = 0; 127 if(inMultilineComment) 128 { 129 var closingIndex = line.IndexOf("*/", StringComparison.InvariantCulture); 130 if(closingIndex == -1) 131 { 132 yield return string.Empty; // no need to adjust brace level 133 continue; 134 } 135 if(localBraceLevel == 0 && closingIndex != line.TrimEnd().Length - 2) 136 { 137 throw GetException(ParsingError.SyntaxError, lineNo, closingIndex, originalLine, "Multiline comment in indent mode can only finish at the end of the line.", path); 138 } 139 var newLine = new StringBuilder(line); 140 for(var i = 0; i <= closingIndex + 1; i++) 141 { 142 newLine[i] = ' '; 143 } 144 line = newLine.ToString(); 145 inMultilineComment = false; 146 } 147 while(true) 148 { 149 switch(FindInLine(line, ref currentIndex)) 150 { 151 case FindResult.Nothing: 152 AccountBraceLevel(line, ref localBraceLevel); 153 yield return line; 154 goto next; 155 case FindResult.SingleLineCommentStart: 156 line = line.Substring(0, currentIndex - 1); 157 AccountBraceLevel(line, ref localBraceLevel); 158 yield return line; 159 goto next; 160 case FindResult.StringStart: 161 currentIndex++; 162 while(true) 163 { 164 currentIndex = line.IndexOf('"', currentIndex) + 1; 165 if(currentIndex == 0) // means that IndexOf returned -1 166 { 167 throw GetException(ParsingError.SyntaxError, lineNo, originalLine.Length - 1, originalLine, "Unterminated string.", path); 168 } 169 // if this is escaped quote, just ignore it 170 if(!IsEscapedPosition(line, currentIndex - 1)) 171 { 172 break; 173 } 174 } 175 break; 176 case FindResult.MultilineCommentStart: 177 var nextIndex = line.IndexOf("*/", currentIndex + 1, StringComparison.InvariantCulture) + 2; 178 if(nextIndex == 1) // means that IndexOf returned -1 179 { 180 inMultilineComment = true; 181 line = line.Substring(0, currentIndex - 1); 182 AccountBraceLevel(line, ref localBraceLevel); 183 yield return line; 184 goto next; 185 } 186 if(localBraceLevel == 0) 187 { 188 if(line.Length - line.TrimStart().Length + 1 == currentIndex && // comment is the first meaningful thing in line 189 line.TrimEnd().Length != nextIndex) // but not the last one 190 { 191 throw GetException(ParsingError.SyntaxError, lineNo, currentIndex, originalLine, 192 "Single line multiline comment in indent mode cannot be the first non-whitespace element of a line if it is does not span to the end of the line.", 193 path); 194 } 195 } 196 var newLine = new StringBuilder(line); 197 for(var i = currentIndex - 1; i < nextIndex; i++) 198 { 199 newLine[i] = ' '; 200 } 201 line = newLine.ToString(); 202 currentIndex = nextIndex; 203 break; 204 } 205 } 206 next:; 207 } 208 } 209 FindInLine(string line, ref int currentIndex)210 private static FindResult FindInLine(string line, ref int currentIndex) 211 { 212 for(; currentIndex < line.Length; currentIndex++) 213 { 214 switch(line[currentIndex]) 215 { 216 case '"': 217 return FindResult.StringStart; 218 case '/': 219 if(line.Length > currentIndex + 1) 220 { 221 currentIndex++; 222 var nextChar = line[currentIndex]; 223 if(nextChar == '*') 224 { 225 return FindResult.MultilineCommentStart; 226 } 227 else if(nextChar == '/') 228 { 229 if(currentIndex == 1 || (currentIndex >= 2 && line[currentIndex - 2] == ' ')) 230 { 231 return FindResult.SingleLineCommentStart; 232 } 233 } 234 } 235 break; 236 } 237 } 238 return FindResult.Nothing; 239 } 240 HandleMultilineStrings(IEnumerable<string> sourceLine, string path)241 public static IEnumerable<string> HandleMultilineStrings(IEnumerable<string> sourceLine, string path) 242 { 243 const string quoteDelimiter = "'''"; 244 var multilineString = new List<string>(); 245 var inMultilineString = false; 246 var regexCharacterToFind = new Regex(Regex.Escape(quoteDelimiter), RegexOptions.None); 247 var openingQuoteLine = new Tuple<int, string, int>(-1, "", -1); 248 249 foreach(var currentLine in sourceLine.Select((value, index) => new { index, value })) 250 { 251 int lastQuoteIndex = -1; 252 var line = currentLine.value; 253 var validQuotes = CountUnescapedCharacters(line, regexCharacterToFind, out lastQuoteIndex); 254 255 if(inMultilineString) 256 { 257 multilineString.Add(line); 258 if(validQuotes >= 1) 259 { 260 inMultilineString = false; 261 var str = string.Join("\n", multilineString); 262 multilineString.Clear(); 263 yield return str; 264 } 265 } 266 else 267 { 268 if(validQuotes != 1) // we have opening and closing quote (or more) or no quotes at all 269 { 270 yield return line; 271 continue; 272 } 273 openingQuoteLine = new Tuple<int, string, int>(currentLine.index, currentLine.value, lastQuoteIndex); 274 inMultilineString = true; 275 multilineString.Add(line); 276 } 277 } 278 279 if(inMultilineString) //if closing quote was never found 280 { 281 var errorLine = openingQuoteLine.Item2; 282 var errorQuoteIndex = openingQuoteLine.Item3; 283 throw GetException(ParsingError.SyntaxError, openingQuoteLine.Item1, errorQuoteIndex, errorLine, 284 "Unclosed multiline string", path); 285 } 286 } 287 CountUnescapedCharacters(string line, Regex regexCharacterToFind, out int lastQuoteIndex, char escapeCharacter = B)288 public static int CountUnescapedCharacters(string line, Regex regexCharacterToFind, out int lastQuoteIndex, char escapeCharacter = '\\') 289 { 290 var matches = regexCharacterToFind.Matches(line); 291 var validQuotesCount = 0; 292 var validQuotesIndexes = new List<int>(); 293 294 foreach(Match ma in matches) 295 { 296 var index = ma.Index - 1; 297 var startIndex = index; 298 var backslashCount = 0; 299 300 while(index >= 0 && line[index] == escapeCharacter) 301 { 302 backslashCount++; 303 index--; 304 } 305 306 if(backslashCount % 2 == 0) 307 { 308 validQuotesCount++; 309 validQuotesIndexes.Add(startIndex); 310 } 311 } 312 lastQuoteIndex = (validQuotesIndexes.Count != 0) ? validQuotesIndexes[validQuotesIndexes.Count - 1] : -1; 313 return validQuotesCount; 314 } 315 DecorateLineIfNecessary(string line, int oldIndentLevel, int newIndentLevel, bool doNotInsertSemicolon)316 private static string DecorateLineIfNecessary(string line, int oldIndentLevel, int newIndentLevel, bool doNotInsertSemicolon) 317 { 318 var builder = new StringBuilder(line); 319 if(newIndentLevel > oldIndentLevel) 320 { 321 return builder.Append('{', newIndentLevel - oldIndentLevel).ToString(); 322 } 323 if((newIndentLevel < oldIndentLevel)) 324 { 325 builder.Append('}', oldIndentLevel - newIndentLevel); 326 if(!doNotInsertSemicolon) 327 { 328 builder.Append(';'); 329 } 330 return builder.ToString(); 331 } 332 if(string.IsNullOrWhiteSpace(line)) 333 { 334 return line; 335 } 336 return doNotInsertSemicolon ? line : line + ';'; 337 } 338 GetException(ParsingError error, int lineNo, int columnNo, string line, string message, string path)339 private static ParsingException GetException(ParsingError error, int lineNo, int columnNo, string line, string message, string path) 340 { 341 message = string.Format("Error E{0:D2}: ", (int)error) + message + Environment.NewLine + 342 string.Format("At {0}{1}:{2}:", path == "" ? "" : path + ':', lineNo + 1, columnNo + 1) + Environment.NewLine + 343 line + Environment.NewLine + new string(' ', columnNo) + "^"; 344 throw new ParsingException(error, message); 345 346 } 347 GetIndentLevel(string line, int lineNo, string path)348 private static int GetIndentLevel(string line, int lineNo, string path) 349 { 350 var spacesNo = line.TakeWhile(x => x == ' ').Count(); 351 if((spacesNo % SpacesPerIndent) != 0) 352 { 353 throw GetException(ParsingError.WrongIndent, lineNo, spacesNo - 1, line, 354 string.Format("Indent's length has to be multiple of {0}, this one is {1} spaces long.", SpacesPerIndent, spacesNo), path); 355 } 356 return spacesNo / SpacesPerIndent; 357 } 358 AccountBraceLevel(string line, ref int braceLevel)359 private static void AccountBraceLevel(string line, ref int braceLevel) 360 { 361 // we have to not take braces inside string into account; comments are already removed 362 var inString = false; 363 for(var i = 0; i < line.Length; i++) 364 { 365 var element = line[i]; 366 if(!inString) 367 { 368 braceLevel += element == '{' ? 1 : element == '}' ? -1 : 0; 369 } 370 if(line[i] == '"' && !IsEscapedPosition(line, i)) 371 { 372 inString = !inString; 373 } 374 } 375 } 376 IsEscapedPosition(string str, int position)377 private static bool IsEscapedPosition(string str, int position) 378 { 379 int numEscapes = 0; 380 while(position - 1 - numEscapes >= 0 && str[position - 1 - numEscapes] == '\\') 381 { 382 numEscapes++; 383 } 384 // if there's an odd number of backslashes before this position, it is escaped 385 return numEscapes % 2 == 1; 386 } 387 388 private const int SpacesPerIndent = 4; 389 390 private enum FindResult 391 { 392 Nothing, 393 StringStart, 394 MultilineCommentStart, 395 SingleLineCommentStart 396 } 397 } 398 } 399