Estate-AI-Assistant/app.py at main · Cool-Engr/Estate-AI-Assistant · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
"""
AI Real Estate Assistant - Main Application (V1)

This is the main application file for the AI Real Estate Assistant project (Version 1).
It creates a Streamlit web interface where users can load property data from CSV files,
interact with an AI assistant to find properties matching their criteria, and view
conversation history.

The application uses LangChain's pandas dataframe agent to process real estate data
and respond to user queries.
"""

import pandas as pd
import streamlit as st
import io
import requests
from yarl import URL

from ai.agent import RealEstateGPT  # Import the AI agent class
from common.cfg import *  # Import configuration variables
from data.csv_loader import DataLoaderCsv  # Import CSV loading utilities

# Configure the Streamlit page layout
st.set_page_config(
    page_title="🦾 AI Real Estate Assistant",
    page_icon='💬',
    layout='wide'  # Use the full width of the browser window
)

# Sample user messages for demonstration and testing purposes
_MSG1 = (
    'I am finding a cheap flat in Krakow.\n'
    'Better to have 1-3 rooms, not 1st floor, more than 20 square meters, with parking, '
    'also I would like to negotiate the final price.\n'
    'Please provide properties with important details in json format\n'
    'Do you have options for me?'
)
_MSG2 = (
    'Thanks. Good selection. But please find only one from those, use provided last time, which has middle price for rent, but cheapest price for media')
_MSG3 = 'Looks like you provided me property for Bialystok, but I need for Krakow and from the previous selection'

# Mapping of iteration number to sample messages
# Currently only using the first message, others are commented out
MSG_MAP = {
    0: _MSG1,
    # 1: _MSG2,
    # 2: _MSG3
}

def load_csv_data(url: str, format_data=False):
    """
    Load data from a single CSV URL and optionally format it.

    Parameters:
        url (str): URL to the CSV file
        format_data (bool): Whether to format the data after loading

    Returns:
        DataFrame: The loaded and optionally formatted dataframe
    """
    dataloader = DataLoaderCsv(URL(url))  # Create dataloader for this URL
    df = dataloader.load_df()  # Load the raw dataframe
    df_formatted = dataloader.load_format_df(df) if format_data else df  # Format if requested
    return df_formatted

def load_data(urls, format_data = None, expected_rows = None):
    """
    Load and combine data from multiple CSV URLs.

    This function loads data from each URL provided, combines them, and
    optionally formats and trims the combined dataset to a specified size.

    Parameters:
        urls (list): List of URLs to CSV files
        format_data (bool): Whether to format the combined data
        expected_rows (int): Number of rows to aim for in the final dataset

    Returns:
        DataFrame: The combined and optionally formatted dataframe
    """
    all_data = []
    empty_df = pd.DataFrame()  # Fallback in case of errors

    # Load data from each URL
    for url in urls:
        try:
            df_formatted = load_csv_data(url)
            all_data.append(df_formatted)
        except Exception as e:
            st.error(f"Error loading data from {url}: {e}")
            return empty_df

    # Combine all loaded dataframes
    if all_data:
        data_final = pd.concat(all_data, ignore_index=True)
        print(f'Merged data rows: {len(data_final)}')

        # Format and trim the dataset if requested
        if format_data and expected_rows:
            data_final = DataLoaderCsv.format_df(data_final, rows_count=expected_rows)
            print(f'Concatenated data rows: {len(data_final)}')
        return data_final

    return empty_df

def fix_dataframe(df):
    """
    Convert all columns in a dataframe to string type.

    This ensures that all data can be displayed properly in Streamlit tables.

    Parameters:
        df (DataFrame): The pandas dataframe to convert

    Returns:
        DataFrame: The dataframe with all columns converted to strings
    """
    for column in df.columns:
        df[column] = df[column].astype(str)
    return df

@st.cache_data  # Cache the result to avoid recomputing when the page is refreshed
def display_filters(df_data):
    """
    Display a summary of the dataframe columns and sample values.

    Creates a table showing each column and up to 3 sample values
    from each column to give the user an idea of the data structure.

    Parameters:
        df_data (DataFrame): The dataframe to display filters for
    """
    if df_data.empty:
        st.warning("Data is empty. No filters to display.")
        return

    rows = []
    max_sample_size = 3  # Show up to 3 sample values per column

    # Create a summary with column names and sample values
    for col in df_data.columns:
        unique_values = df_data[col].unique()
        sample_values = unique_values[:max_sample_size]
        # Pad with empty strings if fewer than max_sample_size unique values
        sample_values = list(sample_values) + [''] * (max_sample_size - len(sample_values))
        rows.append([col] + sample_values)

    # Create a summary dataframe
    columns = ["Header"] + [f"Value {i+1}" for i in range(max_sample_size)]
    df_summary = pd.DataFrame(rows, columns=columns)

    st.write("Here are the column headers and sample values:")
    df_summary_fixed = fix_dataframe(df_summary)  # Convert to string for display
    st.table(df_summary_fixed)  # Show as a fixed-width table

def display_api_key():
    """
    Display the UI components for entering an OpenAI API key.

    This function shows instructional text with links to Streamlit secrets
    management and OpenAI API key page, followed by a password input field.

    Returns:
        str: The API key entered by the user
    """
    # Display link to Streamlit secrets management documentation
    st.markdown(
        'Setup [\"OPENAI\\_API\\_KEY\"]('
        'https://docs.streamlit.io/deploy/streamlit-community-cloud/deploy-your-app/secrets-management)',
        unsafe_allow_html=True
    )
    st.write('___')

    # Display link to OpenAI API key page
    st.markdown(
        'Enter [OpenAI API Key](https://platform.openai.com/account/api-keys) * optional',
        unsafe_allow_html=True
    )

    # Create a password input field for the API key
    openai_api_key = st.text_input(
        'OpenAI API Key [Optional]', type='password', key='api_key_input', label_visibility='collapsed'
    )
    return openai_api_key

def process_query(query, use_test_data):
    """
    Process a user query and generate a response.

    This function either generates a fake response using Faker (if use_test_data is True)
    or passes the query to the AI agent to get a real response.
    The query and response are then stored in the conversation history.

    Parameters:
        query (str): The user's input query
        use_test_data (bool): Whether to use fake test data instead of actual AI responses
    """
    if query:
        if use_test_data:
            # Generate a fake response for testing
            response = 'FAKE: '
            response += fake_en.text(max_nb_chars=100)  # Generate random English text
        else:
            # Get a real response from the AI agent
            response = st.session_state['ai_agent'].ask_qn(query)

        # Handle errors and add to conversation history
        if response.startswith('GPT Error:'):
            st.warning(response, icon='⚠')  # Display warning for errors
            st.session_state['conversation_history'].insert(0, {'Client': query, 'AI': ''})
        else:
            # Add the successful exchange to conversation history (newest first)
            st.session_state['conversation_history'].insert(0, {'Client': query, 'AI': response})

def display_conversation():
    """
    Display the conversation history between the user and the AI.

    This function renders the conversation history as a series of text areas,
    with the most recent exchanges displayed first.
    """
    # Initialize conversation history if it doesn't exist
    if 'conversation_history' not in st.session_state:
        st.session_state['conversation_history'] = []

    # Display each exchange in the conversation history
    if st.session_state['conversation_history']:
        for idx, exchange in enumerate(st.session_state['conversation_history'], start=1):
            # Display user messages with client icon
            st.text_area(f"Client 🧑:", value=exchange['Client'], height=100, disabled=True, key=f"client_{idx}")
            # Display AI responses with robot icon
            st.text_area(f"AI 🤖:", value=exchange['AI'], height=100, disabled=True, key=f"ai_{idx}")

# Initialize session state variables
if 'conversation_history' not in st.session_state:
    st.session_state['conversation_history'] = []  # To store the conversation exchanges

if 'iteration' not in st.session_state:
    st.session_state['iteration'] = 0  # Track the conversation turns

if 'test_msg' not in st.session_state:
    st.session_state['test_msg'] = _MSG1  # Set default test message

# Set the application title
st.title('🦾 AI Real Estate Assistant')

st.markdown("""
    <style>
    .full-width-form {
        width: 100%;
    }
    .full-width-form .stTextArea {
        width: 100%;
    }
    .full-width-form .stButton {
        width: 100%;
    }
    .form-container {
        margin: 20px;
    }
    .api-key-container {
        margin-top: 20px;
    }
    .button-container {
        display: flex;
        align-items: center;
        gap: 10px;
        margin-bottom: 20px;
    }
    .form-container {
        flex: 1;
    }
    .api-key-container {
        flex: 1;
    }
    .conversation-container {
        max-height: 90vh; /* Adjust to fill more of the screen */
        overflow-y: auto;
    }
    </style>
""", unsafe_allow_html=True)

# Create a two-column layout
col1, col2 = st.columns([2, 2])  # Equal width for input and conversation columns

# Left column: Input and Settings
with ((col1)):
    st.write("### Input and Settings")

    # Input area for multiple CSV URLs
    urls_input = st.text_area('Enter CSV URLs (one per line)',
                             GIT_DATA_SET_URLS_STR,  # Default URLs from config
                             key='csv_urls',
                             height=200)

    # Process the input text into a list of URLs
    urls = [url.strip() for url in urls_input.split('\n') if url.strip()]

    # Button to trigger data loading
    load_data_button = st.button("Load Data")

    # Data processing options
    format_data = st.checkbox('Concatenate & And format data',
                             value=True,  # Enabled by default
                             key='format_data')

    expected_rows = st.number_input('Expected Rows Count',
                                   min_value=1,
                                   value=2000,  # Default row count
                                   step=500,
                                   key='expected_rows')

    # Handle data loading when the button is clicked
    if load_data_button and urls:
        # Load data from the URLs and store in session state
        st.session_state['df_data'] = load_data(urls, format_data, expected_rows)
        st.session_state['df_urls'] = urls  # Save URLs for reference

        # Display confirmation or error message
        if not st.session_state['df_data'].empty:
            st.write(f"Data loaded successfully.")
            st.write(f"Rows count: {len(st.session_state['df_data'])}")
        else:
            st.error("Failed to load data or the data is empty.")

    # Toggle button for API key input visibility
    if st.button("OpenAI API Key", key="api_key"):
        # Toggle the visibility state
        st.session_state.show_api_key = not st.session_state.get('show_api_key', False)

    # Display API key input field if toggled on
    if st.session_state.get('show_api_key', False):
        openai_api_key = display_api_key()
    else:
        openai_api_key = None

    # Option to use fake test responses instead of real AI
    use_test_data = st.checkbox('Use Test Responses', value=True, key='use_test_data')

    # Create a form for user input and submission
    with st.form(key='full-width-form'):
        # Input prompt for the user
        label = 'Talk to me about your dream property 😎:\n'

        # Track conversation iterations
        iteration = st.session_state.get('iteration', 0)

        # Get test message from session state
        test_msg = st.session_state['test_msg']

        # Text area for user input
        text = st.text_area(label=label, height=200)

        # Submit button to process the query
        submitted = st.form_submit_button('Submit')

        # Handle form submission
        if submitted:
            # Determine which API key to use (user-provided or from config)
            if openai_api_key:
                if not openai_api_key.startswith('sk-'):
                    st.warning('Please enter a valid OpenAI API key starting with "sk-".', icon='⚠')
                key = openai_api_key
            else:
                key = OPENAI_API_KEY

            # Validate the API key format
            if not key.startswith('sk-'):
                st.warning('Please enter a valid OpenAI API key starting with "sk-".', icon='⚠')
            else:
                # Get the loaded data from session state
                df_data_act = st.session_state.get('df_data')
                if df_data_act is None or df_data_act.empty:
                    st.error('Please load data first.')
                else:
                    # Initialize the AI agent if not already done
                    if 'ai_agent' not in st.session_state:
                        st.session_state['ai_agent'] = RealEstateGPT(df_data_act, key)

                    # Process the user's query and get a response
                    process_query(text, use_test_data)

                    # Clear test message after first iteration and increment counter
                    if st.session_state['iteration'] == 0:
                        st.session_state['test_msg'] = ''
                    st.session_state['iteration'] += 1

# Right column: Conversation History
with col2:
    st.write("### Conversation History")

    # Create a scrollable container for the conversation history
    with st.container():
        # Add custom CSS class for styling the container
        st.markdown('<div class="conversation-container">', unsafe_allow_html=True)

        # Display all conversation exchanges
        display_conversation()

        # Close the custom CSS container
        st.markdown('</div>', unsafe_allow_html=True)