I am trying to implement Earley's algorithm for parsing a grammar, however I must have done something wrong because after the first entry in the chart it doesn't go through the rest of the input string.
My test grammar is the following:
S -> aXbX | bXaX
X -> aXbX | bXaX | epsilon
S and X are non-terminals; a and b are terminals.
The string I want to check if it is accepted or not by the grammar is: 'abba'.
Here is my code:
rules = {
"S": [
['aXbX'],
['bXaX'],
],
"X" : [
['aXbX'],
['bXaX'],
['']
]
}
def predictor(rule, state):
if rule["right"][rule["dot"]].isupper(): # NON-TERMINAL
return [{
"left": rule["right"][rule["dot"]],
"right": right,
"dot": 0,
"op": "PREDICTOR",
"completor": []
} for right in rules[rule["right"][rule["dot"]]]]
else:
return []
def scanner(rule, next_input):
# TERMINAL
if rule["right"][rule["dot"]].islower() and next_input in rules[rule["right"][rule["dot"]]]:
print('scanner')
return [{
"left": rule["right"][rule["dot"]],
"right": [next_input],
"dot": 1,
"op": "SCANNER",
"completor": []
}]
else:
return []
def completor(rule, charts):
if rule["dot"] == len(rule["right"]):
print('completor')
return list(map(
lambda filter_rule: {
"left": filter_rule["left"],
"right": filter_rule["right"],
"dot": filter_rule["dot"] + 1,
"op": "COMPLETOR",
"completor": [rule] + filter_rule["completor"]
},
filter(
lambda p_rule: p_rule["dot"] < len(p_rule["right"]) and rule["left"] == p_rule["right"][p_rule["dot"]],
charts[rule["state"]]
)
))
else:
return []
input_string = 'abba'
input_arr = [char for char in input_string] + ['']
charts = [[{
"left": "S'",
"right": ["S"],
"dot": 0,
"op": "-",
"completor": []
}]]
for curr_state in range(len(input_arr)):
curr_chart = charts[curr_state]
next_chart = []
for curr_rule in curr_chart:
if curr_rule["dot"] < len(curr_rule["right"]): # not finished
curr_chart += [i for i in predictor(curr_rule, curr_state) if i not in curr_chart]
next_chart += [i for i in scanner(curr_rule, input_arr[curr_state]) if i not in next_chart]
else:
print('else')
curr_chart += [i for i in completor(curr_rule, charts) if i not in curr_chart]
charts.append(next_chart)
def print_charts(charts, inp):
for chart_no, chart in zip(range(len(charts)), charts):
print("\t{}".format("S" + str(chart_no)))
print("\t\n".join(map(
lambda x: "\t{} --> {}, {} {}".format(
x["left"],
"".join(x["right"][:x["dot"]] + ["."] + x["right"][x["dot"]:]),
str(chart_no) + ',',
x["op"]
),
chart
)))
print()
print_charts(charts[:-1], input_arr)
And this is the output I get (for states 1 to 4 I should get 5 to 9 entries):
S0
S' --> .S, 0, -
S --> .aXbX, 0, PREDICTOR
S --> .bXaX, 0, PREDICTOR
S1
S2
S3
S4