1 //
2 // Copyright (c) 2010-2023 Antmicro
3 //
4 // This file is licensed under the MIT License.
5 // Full license text is available in 'licenses/MIT.txt'.
6 //
7 using System;
8 using System.Collections.Generic;
9 using System.Linq;
10 using System.Text;
11 using System.Text.RegularExpressions;
12 
13 namespace Antmicro.Renode.PlatformDescription
14 {
15     public static class PreLexer
16     {
Process(string source, string path = R)17         public static IEnumerable<string> Process(string source, string path = "")
18         {
19             // We remove '\r' so that we don't have to worry about line endings.
20             var sourceInLines = source.Replace("\r", string.Empty).Split(new[] { '\n' }, StringSplitOptions.None);
21             var lineSource = HandleMultilineStrings(sourceInLines, path);
22             var inputBraceLevel = 0;
23             var outputBraceLevel = 0;
24 
25             var lineNo = -1;
26             var enumerator = HandleComments(lineSource, path).GetEnumerator();
27             var started = false; // will change to true if the file is not empty (or has only empty lines)
28             while(enumerator.MoveNext())
29             {
30                 lineNo++;
31                 if(!string.IsNullOrWhiteSpace(enumerator.Current))
32                 {
33                     started = true;
34                     break;
35                 }
36                 yield return enumerator.Current;
37             }
38             if(!started)
39             {
40                 yield break;
41             }
42 
43             var oldLine = enumerator.Current;
44             var oldIndentLevel = GetIndentLevel(oldLine, lineNo, path);
45 
46             if(oldIndentLevel != 0)
47             {
48                 throw GetException(ParsingError.WrongIndent, lineNo, 0, oldLine, "First line with text cannot be indented.", path);
49             }
50 
51             if(!enumerator.MoveNext())
52             {
53                 yield return oldLine;
54                 AccountBraceLevel(oldLine, ref outputBraceLevel);
55                 yield break;
56             }
57 
58             var numberOfEmptyLines = 0;
59 
60             do
61             {
62                 lineNo++;
63                 AccountBraceLevel(oldLine, ref inputBraceLevel);
64                 var newLine = enumerator.Current;
65 
66                 // pass through all empty lines
67                 while(string.IsNullOrWhiteSpace(newLine))
68                 {
69                     numberOfEmptyLines++;
70                     if(!enumerator.MoveNext())
71                     {
72                         goto finish;
73                     }
74                     newLine = enumerator.Current;
75                 }
76 
77                 if(inputBraceLevel > 0)
78                 {
79                     AccountBraceLevel(oldLine, ref outputBraceLevel);
80                     yield return oldLine;
81                 }
82                 else
83                 {
84                     var newIndentLevel = GetIndentLevel(newLine, lineNo, path);
85                     var result = DecorateLineIfNecessary(oldLine, oldIndentLevel, newIndentLevel, false);
86                     yield return result;
87                     AccountBraceLevel(result, ref outputBraceLevel);
88                     oldIndentLevel = newIndentLevel;
89                 }
90 
91                 for(var i = 0; i < numberOfEmptyLines; i++)
92                 {
93                     yield return "";
94                 }
95                 numberOfEmptyLines = 0;
96 
97                 oldLine = newLine;
98             }
99             while(enumerator.MoveNext());
100 finish:
101             AccountBraceLevel(oldLine, ref inputBraceLevel);
102             oldLine = DecorateLineIfNecessary(oldLine, oldIndentLevel, 0, true);
103             yield return oldLine;
104             AccountBraceLevel(oldLine, ref outputBraceLevel);
105             if(inputBraceLevel == 0 && outputBraceLevel != 0)
106             {
107                 // we only check output brace level if input was balanced, otherwise it does not make sense
108                 throw new ParsingException(ParsingError.InternalPrelexerError, "Internal prelexer error, unbalanced output with balanced input.");
109             }
110 
111             for(var i = 0; i < numberOfEmptyLines; i++)
112             {
113                 yield return "";
114             }
115         }
116 
HandleComments(IEnumerable<string> lineSource, string path)117         private static IEnumerable<string> HandleComments(IEnumerable<string> lineSource, string path)
118         {
119             var inMultilineComment = false;
120             var localBraceLevel = 0;
121             var lineNo = -1;
122             foreach(var originalLine in lineSource)
123             {
124                 lineNo++;
125                 var line = originalLine;
126                 var currentIndex = 0;
127                 if(inMultilineComment)
128                 {
129                     var closingIndex = line.IndexOf("*/", StringComparison.InvariantCulture);
130                     if(closingIndex == -1)
131                     {
132                         yield return string.Empty; // no need to adjust brace level
133                         continue;
134                     }
135                     if(localBraceLevel == 0 && closingIndex != line.TrimEnd().Length - 2)
136                     {
137                         throw GetException(ParsingError.SyntaxError, lineNo, closingIndex, originalLine, "Multiline comment in indent mode can only finish at the end of the line.", path);
138                     }
139                     var newLine = new StringBuilder(line);
140                     for(var i = 0; i <= closingIndex + 1; i++)
141                     {
142                         newLine[i] = ' ';
143                     }
144                     line = newLine.ToString();
145                     inMultilineComment = false;
146                 }
147                 while(true)
148                 {
149                     switch(FindInLine(line, ref currentIndex))
150                     {
151                     case FindResult.Nothing:
152                         AccountBraceLevel(line, ref localBraceLevel);
153                         yield return line;
154                         goto next;
155                     case FindResult.SingleLineCommentStart:
156                         line = line.Substring(0, currentIndex - 1);
157                         AccountBraceLevel(line, ref localBraceLevel);
158                         yield return line;
159                         goto next;
160                     case FindResult.StringStart:
161                         currentIndex++;
162                         while(true)
163                         {
164                             currentIndex = line.IndexOf('"', currentIndex) + 1;
165                             if(currentIndex == 0) // means that IndexOf returned -1
166                             {
167                                 throw GetException(ParsingError.SyntaxError, lineNo, originalLine.Length - 1, originalLine, "Unterminated string.", path);
168                             }
169                             // if this is escaped quote, just ignore it
170                             if(!IsEscapedPosition(line, currentIndex - 1))
171                             {
172                                 break;
173                             }
174                         }
175                         break;
176                     case FindResult.MultilineCommentStart:
177                         var nextIndex = line.IndexOf("*/", currentIndex + 1, StringComparison.InvariantCulture) + 2;
178                         if(nextIndex == 1) // means that IndexOf returned -1
179                         {
180                             inMultilineComment = true;
181                             line = line.Substring(0, currentIndex - 1);
182                             AccountBraceLevel(line, ref localBraceLevel);
183                             yield return line;
184                             goto next;
185                         }
186                         if(localBraceLevel == 0)
187                         {
188                             if(line.Length - line.TrimStart().Length  + 1 == currentIndex && // comment is the first meaningful thing in line
189                                line.TrimEnd().Length != nextIndex) // but not the last one
190                             {
191                                 throw GetException(ParsingError.SyntaxError, lineNo, currentIndex, originalLine,
192                                                    "Single line multiline comment in indent mode cannot be the first non-whitespace element of a line if it is does not span to the end of the line.",
193                                                    path);
194                             }
195                         }
196                         var newLine = new StringBuilder(line);
197                         for(var i = currentIndex - 1; i < nextIndex; i++)
198                         {
199                             newLine[i] = ' ';
200                         }
201                         line = newLine.ToString();
202                         currentIndex = nextIndex;
203                         break;
204                     }
205                 }
206             next:;
207             }
208         }
209 
FindInLine(string line, ref int currentIndex)210         private static FindResult FindInLine(string line, ref int currentIndex)
211         {
212             for(; currentIndex < line.Length; currentIndex++)
213             {
214                 switch(line[currentIndex])
215                 {
216                 case '"':
217                     return FindResult.StringStart;
218                 case '/':
219                     if(line.Length > currentIndex + 1)
220                     {
221                         currentIndex++;
222                         var nextChar = line[currentIndex];
223                         if(nextChar == '*')
224                         {
225                             return FindResult.MultilineCommentStart;
226                         }
227                         else if(nextChar == '/')
228                         {
229                             if(currentIndex == 1 || (currentIndex >= 2 && line[currentIndex - 2] == ' '))
230                             {
231                                 return FindResult.SingleLineCommentStart;
232                             }
233                         }
234                     }
235                     break;
236                 }
237             }
238             return FindResult.Nothing;
239         }
240 
HandleMultilineStrings(IEnumerable<string> sourceLine, string path)241         public static IEnumerable<string> HandleMultilineStrings(IEnumerable<string> sourceLine, string path)
242         {
243             const string quoteDelimiter = "'''";
244             var multilineString = new List<string>();
245             var inMultilineString = false;
246             var regexCharacterToFind = new Regex(Regex.Escape(quoteDelimiter), RegexOptions.None);
247             var openingQuoteLine = new Tuple<int, string, int>(-1, "", -1);
248 
249             foreach(var currentLine in sourceLine.Select((value, index) => new { index, value }))
250             {
251                 int lastQuoteIndex = -1;
252                 var line = currentLine.value;
253                 var validQuotes = CountUnescapedCharacters(line, regexCharacterToFind, out lastQuoteIndex);
254 
255                 if(inMultilineString)
256                 {
257                     multilineString.Add(line);
258                     if(validQuotes >= 1)
259                     {
260                         inMultilineString = false;
261                         var str = string.Join("\n", multilineString);
262                         multilineString.Clear();
263                         yield return str;
264                     }
265                 }
266                 else
267                 {
268                     if(validQuotes != 1) // we have opening and closing quote (or more) or no quotes at all
269                     {
270                         yield return line;
271                         continue;
272                     }
273                     openingQuoteLine = new Tuple<int, string, int>(currentLine.index, currentLine.value, lastQuoteIndex);
274                     inMultilineString = true;
275                     multilineString.Add(line);
276                 }
277             }
278 
279             if(inMultilineString) //if closing quote was never found
280             {
281                 var errorLine = openingQuoteLine.Item2;
282                 var errorQuoteIndex = openingQuoteLine.Item3;
283                 throw GetException(ParsingError.SyntaxError, openingQuoteLine.Item1, errorQuoteIndex, errorLine,
284                                    "Unclosed multiline string", path);
285             }
286         }
287 
CountUnescapedCharacters(string line, Regex regexCharacterToFind, out int lastQuoteIndex, char escapeCharacter = B)288         public static int CountUnescapedCharacters(string line, Regex regexCharacterToFind, out int lastQuoteIndex, char escapeCharacter = '\\')
289         {
290             var matches = regexCharacterToFind.Matches(line);
291             var validQuotesCount = 0;
292             var validQuotesIndexes = new List<int>();
293 
294             foreach(Match ma in matches)
295             {
296                 var index = ma.Index - 1;
297                 var startIndex = index;
298                 var backslashCount = 0;
299 
300                 while(index >= 0 && line[index] == escapeCharacter)
301                 {
302                     backslashCount++;
303                     index--;
304                 }
305 
306                 if(backslashCount % 2 == 0)
307                 {
308                     validQuotesCount++;
309                     validQuotesIndexes.Add(startIndex);
310                 }
311             }
312             lastQuoteIndex = (validQuotesIndexes.Count != 0) ? validQuotesIndexes[validQuotesIndexes.Count - 1] : -1;
313             return validQuotesCount;
314         }
315 
DecorateLineIfNecessary(string line, int oldIndentLevel, int newIndentLevel, bool doNotInsertSemicolon)316         private static string DecorateLineIfNecessary(string line, int oldIndentLevel, int newIndentLevel, bool doNotInsertSemicolon)
317         {
318             var builder = new StringBuilder(line);
319             if(newIndentLevel > oldIndentLevel)
320             {
321                 return builder.Append('{', newIndentLevel - oldIndentLevel).ToString();
322             }
323             if((newIndentLevel < oldIndentLevel))
324             {
325                 builder.Append('}', oldIndentLevel - newIndentLevel);
326                 if(!doNotInsertSemicolon)
327                 {
328                     builder.Append(';');
329                 }
330                 return builder.ToString();
331             }
332             if(string.IsNullOrWhiteSpace(line))
333             {
334                 return line;
335             }
336             return doNotInsertSemicolon ? line : line + ';';
337         }
338 
GetException(ParsingError error, int lineNo, int columnNo, string line, string message, string path)339         private static ParsingException GetException(ParsingError error, int lineNo, int columnNo, string line, string message, string path)
340         {
341             message = string.Format("Error E{0:D2}: ", (int)error) + message + Environment.NewLine +
342                                            string.Format("At {0}{1}:{2}:", path == "" ? "" : path + ':', lineNo + 1, columnNo + 1) + Environment.NewLine +
343                                            line + Environment.NewLine + new string(' ', columnNo) + "^";
344             throw new ParsingException(error, message);
345 
346         }
347 
GetIndentLevel(string line, int lineNo, string path)348         private static int GetIndentLevel(string line, int lineNo, string path)
349         {
350             var spacesNo = line.TakeWhile(x => x == ' ').Count();
351             if((spacesNo % SpacesPerIndent) != 0)
352             {
353                 throw GetException(ParsingError.WrongIndent, lineNo, spacesNo - 1, line,
354                                    string.Format("Indent's length has to be multiple of {0}, this one is {1} spaces long.", SpacesPerIndent, spacesNo), path);
355             }
356             return spacesNo / SpacesPerIndent;
357         }
358 
AccountBraceLevel(string line, ref int braceLevel)359         private static void AccountBraceLevel(string line, ref int braceLevel)
360         {
361             // we have to not take braces inside string into account; comments are already removed
362             var inString = false;
363             for(var i = 0; i < line.Length; i++)
364             {
365                 var element = line[i];
366                 if(!inString)
367                 {
368                     braceLevel += element == '{' ? 1 : element == '}' ? -1 : 0;
369                 }
370                 if(line[i] == '"' && !IsEscapedPosition(line, i))
371                 {
372                     inString = !inString;
373                 }
374             }
375         }
376 
IsEscapedPosition(string str, int position)377         private static bool IsEscapedPosition(string str, int position)
378         {
379             int numEscapes = 0;
380             while(position - 1 - numEscapes >= 0 && str[position - 1 - numEscapes] == '\\')
381             {
382                 numEscapes++;
383             }
384             // if there's an odd number of backslashes before this position, it is escaped
385             return numEscapes % 2 == 1;
386         }
387 
388         private const int SpacesPerIndent = 4;
389 
390         private enum FindResult
391         {
392             Nothing,
393             StringStart,
394             MultilineCommentStart,
395             SingleLineCommentStart
396         }
397     }
398 }
399