Skip to content

Commit fdd7093

Browse files
authored
Merge pull request #588 from Ojash927/graphing
Added sortedPatternFrequencyGraph.py
2 parents 303d686 + ad8cc87 commit fdd7093

1 file changed

Lines changed: 142 additions & 0 deletions

File tree

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# sortedPatternFrequencyGraph
2+
#
3+
# Generates a Sorted Pattern Frequency graph
4+
# showing how often each mined frequent pattern occurs in the dataset.
5+
#
6+
# X-axis: Patterns sorted by frequency (most frequent → least frequent)
7+
# Y-axis: Support count of each pattern
8+
#
9+
# The graph visualizes the "long tail" distribution among discovered patterns.
10+
# It also supports plotting up to five minimum support thresholds for comparison.
11+
#
12+
# Usage Example:
13+
# from PAMI.extras.graph import sortedPatternFrequencyGraph as spfg
14+
# spfg.generateSortedPatternFrequencyGraph("Transactional_T10I4D100K.csv", "\t", "Apriori", [100, 200, 300])
15+
#
16+
17+
__copyright__ = """
18+
Copyright (C) 2021 Rage Uday Kiran
19+
20+
This program is free software: you can redistribute it and/or modify
21+
it under the terms of the GNU General Public License as published by
22+
the Free Software Foundation, either version 3 of the License, or
23+
(at your option) any later version.
24+
25+
This program is distributed in the hope that it will be useful,
26+
but WITHOUT ANY WARRANTY; without even the implied warranty of
27+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28+
GNU General Public License for more details.
29+
30+
You should have received a copy of the GNU General Public License
31+
along with this program. If not, see <https://www.gnu.org/licenses/>.
32+
"""
33+
34+
import importlib
35+
import plotly.graph_objects as go
36+
37+
def generateSortedPatternFrequencyGraph(inputFile, sep="\t", algorithm="Apriori", minSupList=None):
38+
print(f"Generating Graph using {algorithm}...")
39+
40+
if minSupList is None:
41+
minSupList = [100]
42+
43+
if len(minSupList) > 5:
44+
print("Warning: Only up to 5 minimum supports are supported; truncating list.")
45+
minSupList = minSupList[:5]
46+
47+
#Import chosen algorithm dynamically
48+
try:
49+
module = importlib.import_module(f"PAMI.frequentPattern.basic.{algorithm}")
50+
AlgoClass = getattr(module, algorithm)
51+
except Exception as e:
52+
print("Error importing algorithm:", e)
53+
return
54+
55+
#Prepare Plotly figure
56+
fig = go.Figure()
57+
58+
#Use different colors for multiple minsups
59+
colors = ["blue", "red", "green", "orange", "yellow"]
60+
61+
for idx, minSup in enumerate(minSupList):
62+
try:
63+
print(f"Mining frequent patterns with minSup = {minSup}...")
64+
obj = AlgoClass(inputFile, minSup, sep)
65+
obj.mine()
66+
patterns = obj.getPatterns()
67+
except Exception as e:
68+
print(f"Error running algorithm for minSup={minSup}:", e)
69+
continue
70+
71+
if not patterns:
72+
print(f"No frequent patterns found for minSup={minSup}.")
73+
continue
74+
75+
#Sort patterns by support count (descending order)
76+
sorted_patterns = sorted(patterns.items(), key=lambda x: x[1], reverse=True)
77+
pattern_labels = [
78+
"{" + ", ".join(p) + "}" if isinstance(p, (list, tuple, set)) else str(p)
79+
for p, _ in sorted_patterns
80+
]
81+
supports = [v for _, v in sorted_patterns]
82+
83+
fig.add_trace(go.Scatter(
84+
x=list(range(1, len(patterns) + 1)),
85+
y=supports,
86+
mode="lines+markers",
87+
line=dict(color=colors[idx % len(colors)], width=2),
88+
marker=dict(size=5, color=colors[idx % len(colors)]),
89+
name=f"minSup = {minSup}",
90+
hovertemplate=(
91+
"<b>Rank:</b> %{x}<br>"
92+
f"<b>MinSup:</b> {minSup}<br>"
93+
"<b>Pattern:</b> %{text}<br>"
94+
"<b>Support Count:</b> %{y}<extra></extra>"
95+
),
96+
text=pattern_labels
97+
))
98+
99+
fig.update_layout(
100+
title=f"Sorted Pattern Frequency Graph ({algorithm})",
101+
xaxis=dict(
102+
title="Pattern Rank (sorted by support count)",
103+
showspikes=True,
104+
showline=True,
105+
mirror=True,
106+
linewidth=2,
107+
linecolor="black",
108+
ticks="outside",
109+
tickwidth=1.5,
110+
tickcolor="black",
111+
tickfont=dict(size=12, color="black")
112+
),
113+
yaxis=dict(
114+
title="Support Count",
115+
showspikes=True,
116+
showline=True,
117+
mirror=True,
118+
linewidth=2,
119+
linecolor="black",
120+
ticks="outside",
121+
tickwidth=1.5,
122+
tickcolor="black",
123+
tickfont=dict(size=12, color="black")
124+
),
125+
hovermode="closest",
126+
hoverlabel=dict(bgcolor="white", font=dict(size=12, color="black")),
127+
legend=dict(
128+
x=1.05, y=1,
129+
bgcolor="rgba(255,255,255,0.8)",
130+
bordercolor="gray",
131+
title="Minimum Supports"
132+
),
133+
margin=dict(r=160, t=60, l=70, b=60),
134+
template="plotly_white",
135+
font=dict(size=13)
136+
)
137+
138+
fig.show()
139+
140+
if __name__ == "__main__":
141+
inputFile = "Transactional_T10I4D100K.csv"
142+
generateSortedPatternFrequencyGraph(inputFile, "\t", "Apriori", [1000, 2000, 3000, 4000])

0 commit comments

Comments
 (0)