From 22583e68944d3f79c2be3930409e6f529d475549 Mon Sep 17 00:00:00 2001
From: Yohan Simard <ysimard@etud.insa-toulouse.fr>
Date: Thu, 18 Mar 2021 15:04:32 +0100
Subject: [PATCH] Parse input with a space between each terminal + other
 improvements

---
 main.py    | 61 +++++++++++++++++++++++++++++++++---------------------
 sample.txt |  7 ++++---
 2 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/main.py b/main.py
index c439082..198b495 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,6 @@
-
 rules_dict = {}
 terminals_set = set()
 
-
 def parse(lines):
     lines = [line.strip() for line in lines]
     lines = [line.split(":") for line in lines]
@@ -20,6 +18,7 @@ def parse(lines):
     print_includes()
     print_util()
     print_declarations()
+    print("// ---- Functions to parse terminals ----")
     print_terminal_rules()
     print_rules(rules)
     print_main(rules)
@@ -31,19 +30,18 @@ def parse(lines):
 def print_declarations():
     for key in rules_dict:
         print(f"int parse_{key}(char* word, int pos);")
-
+    print()
 
 def print_includes():
-    print("""
+    print("""\
 #include <stdio.h>
 #include <string.h>
 """)
 
 
 def print_util():
-    print("""
-void print_with_indent(int indent, char * string)
-{
+    print("""\
+void print_with_indent(int indent, char * string) {
     printf("%*s%s", indent, "", string);
 }
 """)
@@ -59,7 +57,7 @@ void print_with_indent(int indent, char * string)
 
 def print_main(rules):
     axiom = get_code(rules[0][0])
-    print(f"""
+    print(f"""\
 int main(int argc, char* argv[]) {{
     char* word;
     if (argc >= 2)
@@ -68,7 +66,7 @@ int main(int argc, char* argv[]) {{
         word = "";
     int value = parse_{axiom}(word, 0);
     printf("%d\\n", value);
-    if (value == strlen(word)) {{
+    if (value == strlen(word) + 1) {{
         printf("OK\\n");
     }} else {{
         printf("KO\\n");
@@ -78,7 +76,9 @@ int main(int argc, char* argv[]) {{
 
 
 def print_rules(rules):
+    print("// ---- Functions to parse a non-terminal according to a rule ----")
     print_unit_rules(rules)
+    print("// ---- Functions to parse a non-terminal by testing all rules ----")
     print_global_rules()
 
 
@@ -88,33 +88,36 @@ def print_unit_rules(rules):
         left = rule[0]
         rules_dict[left] += 1
         code = get_code(left)
-        print(f"""int parse_{code}{rules_dict[left]}(char* word, int pos) {{
+        print(f"""\
+int parse_{code}{rules_dict[left]}(char* word, int pos) {{
     int totalCharParsed = 0;
-    int nbCharParsed = 0;
-    printf("Entering {code}{rules_dict[left]}\\n");
-    """)
+    int nbCharParsed;
+    printf("Entering {code}{rules_dict[left]}\\n");""")
         for element in rule[1]:
             elem_code = get_code(element)
             if elem_code != "":
-                print(f"""    nbCharParsed = parse_{elem_code}(word, pos + totalCharParsed);
+                print(f"""
+    nbCharParsed = parse_{elem_code}(word, pos + totalCharParsed);
     if (nbCharParsed == -1) {{
         printf("Fail {elem_code} in {code}{rules_dict[left]}\\n");
         return -1;    
     }}
     totalCharParsed += nbCharParsed;""")
             else:
-                print(f"    printf(\"Epsilon! -> Success\\n\");")
+                print(f"\n    printf(\"Epsilon! -> Success\\n\");")
         print(f"""
     printf("Success {code}{rules_dict[left]}\\n");
     return totalCharParsed;
-}}""")
+}}
+""")
 
 
 def print_global_rules():
     for (key, value) in rules_dict.items():
         code = get_code(key)
-        print(f"""int parse_{code}(char* word, int pos) {{
-    int nbCharParsed = 0;
+        print(f"""\
+int parse_{code}(char* word, int pos) {{
+    int nbCharParsed;
     printf("Entering {key}\\n");""")
         for i in range(1, value + 1):
             print(f"""
@@ -124,7 +127,8 @@ def print_global_rules():
     }}""")
         print("""
     return -1;
-}""")
+}
+""")
 
 
 def get_code(s):
@@ -133,19 +137,28 @@ def get_code(s):
 
 def print_terminal_rules():
     for t in terminals_set:
+        # escape \ and " in the terminal
+        safeT =  "\\" + t if (t == '"' or t == '\\') else t
+
         code = get_code(t)
         l = len(t)
-        print(f"""int parse_{code}(char* word, int pos) {{
+        print(f"""\
+int parse_{code}(char* word, int pos) {{
+    // Extract the next {l} chars of the word
     char substr[{l+1}];
     substr[0] = '\\0';
     strncat(substr, &word[pos], {l});
-    if (strcmp(substr, "{t}") == 0) {{
-        print_with_indent(pos, "{code}\\n");
-        return {l};
+    
+    // Compare this extracted string to the terminal, 
+    // and check if the next char is a space or the end of the string
+    if (strcmp(substr, "{safeT}") == 0 && (word[pos+{l}] == ' ' || word[pos+{l}] == '\\0')) {{
+        print_with_indent(pos, "{safeT}\\n");
+        return {l+1};
     }} else {{
         return -1;
     }}
-}}""")
+}}
+""")
 
 
 if __name__ == '__main__':
diff --git a/sample.txt b/sample.txt
index 7b2fb0c..8191ca8 100644
--- a/sample.txt
+++ b/sample.txt
@@ -1,8 +1,9 @@
+S : a S b
 S : int
-S : string
+S : " string "
 S : { Assoc }
 Assoc : KeyVal AssocBis
-Assoc :
+Assoc : 
 AssocBis : , KeyVal AssocBis
 AssocBis :
-KeyVal : id = S
+KeyVal : id = S
\ No newline at end of file