-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLALR1.py
More file actions
103 lines (86 loc) · 3.92 KB
/
Copy pathLALR1.py
File metadata and controls
103 lines (86 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from grammar import Grammar, Production, Terminal, eof
import LR
import LR1
from LR import dot, SHIFT, REDUCE, ACCEPT
from collections import defaultdict
# TODO: message error on conflicts
def construct_parsing_table(G: Grammar) -> LR.ParsingTable:
"""
Construct a LALR(1) parsing table for grammar G.
This function will first construct the parsing table for LR(1),
and then combine the states with common core items into one state.
The definition of class ParsingTable is as follow:
```
class ParsingTable:
states: dict[int, ItemSet]
action: dict[tuple[int, Symbol]]
goto: dict[tuple[int, Symbol]]
```
where
states: maps index of state to ItemSet.
action: maps a tuple of (state, symbol) to an action, either SHIFT, REDUCE or ACCEPT.
- if action is SHIFT, the return value is (SHIFT, state_to_shift).
- if action is REDUCE, the return value is (REDUCE, production).
- if action is ACCEPT, the return value is (ACCEPT, None).
goto: the GOTO table, maps a tuple of (state, symbol) to a state.
"""
pt = LR1.construct_parsing_table(G)
# a map from state to its core state
state_to_index: dict[LR.ItemSet, int] = {}
index_to_core: dict[int, LR.ItemSet] = {}
core_to_index: dict[LR.ItemSet, int] = {}
# records what should be in the lookahead of one core item.
# lookaheads = mapping of (core state, core item) -> lookaheads
lookaheads: dict[tuple[int, LR.Item], set[Terminal]] = defaultdict(set)
# merging all states having the same core
# kinda mess
index = 0
for state in pt.states.values():
core_state = LR.ItemSet()
item_to_lookaheads = defaultdict(set)
for item in state:
core_item = LR.Item(item.lhs, item.rhs[:], set())
item_to_lookaheads[core_item].add(item.lookahead)
core_state.add(core_item)
if core_state not in core_to_index:
core_to_index[core_state] = index
index_to_core[index] = core_state
index += 1
for core_item, symbols in item_to_lookaheads.items():
for symbol in symbols:
lookaheads[core_to_index[core_state], core_item].add(symbol)
state_to_index[state] = core_to_index[core_state]
# add the lookahead symbols to core states
for (core_index, core_item), symbols in lookaheads.items():
index_to_core[core_index].remove(core_item)
index_to_core[core_index].add(
LR.Item(core_item.lhs, core_item.rhs, symbols))
# from now on the states with same core have been merged
core_to_index = {v: k for k, v in index_to_core.items()}
# start filling in the parsing table
ACTION = {}
GOTO = {}
for state in pt.states.values():
core_index = state_to_index[state]
for item in index_to_core[core_index]:
A = LR.after_dot(item)
if isinstance(A, Terminal):
# the symbol after • is a terminal
goto_state = LR1.goto(G, state, A)
goto_core_index = state_to_index[goto_state]
ACTION[core_index, A] = (SHIFT, goto_core_index)
elif A is None and item.lhs == G.start_symbol and item.rhs[-1] == dot and eof in item.lookahead:
# acceptable state: item = [S' -> S •, $/../...]
ACTION[core_index, eof] = (ACCEPT, None)
elif A is None and item.lhs != G.start_symbol:
# • is at the end of this item, as [B -> alpha •, ...}
action = (REDUCE, Production(item.lhs, item.rhs[:-1]))
for symbol in item.lookahead:
ACTION[core_index, symbol] = action
# construct GOTO[i, n] for all non-terminals n
for n in G.non_terminals():
goto_state = LR1.goto(G, state, n)
if len(goto_state) > 0:
goto_core_index = state_to_index[goto_state]
GOTO[core_index, n] = goto_core_index
return LR.ParsingTable(index_to_core, ACTION, GOTO)