Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 16 additions & 52 deletions GSoC25_H/IndIE/llm_extractor.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,28 @@
import json
import ollama
import time
import re
from typing import List, Dict, Any, Tuple
from dataclasses import dataclass

@dataclass
class ModelConfig:
"""Configuration for the LLM model."""
name: str = "gemma3:12b-it-qat"
temperature: float = 0.1
top_p: float = 0.9
num_predict: int = 2000

class LLMInterface:
"""Interface for interacting with the language model via Ollama."""
def __init__(self, model_config: ModelConfig, max_retries: int = 2, timeout: int = 60):
self.model_config = model_config
self.max_retries = max_retries
self.client = ollama.Client(timeout=timeout)

def generate_response(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Generates a response from the LLM, with retries for handling errors.
"""
retries = 0
while retries < self.max_retries:
try:
response = self.client.chat(
model=self.model_config.name,
messages=messages,
options={
"temperature": self.model_config.temperature,
"top_p": self.model_config.top_p,
"num_predict": self.model_config.num_predict
}
)
return response
import sys
import os
from typing import List, Dict

except Exception as e:
retries += 1
print(f"Error calling model '{self.model_config.name}': {e}. Retrying ({retries}/{self.max_retries})...")
time.sleep(2 ** retries)

print(f"Failed to get a valid response from model '{self.model_config.name}' after {self.max_retries} retries.")
return None
# Add parent directory to path to allow importing from src
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from src.llm_core import LLMService, ModelConfig

class LLMExtractor:
def __init__(self, model_name="gemma3:12b-it-qat", temperature=0.05, max_retries=3, timeout=120):
# USES NEW SHARED CONFIG FROM SRC
self.model_config = ModelConfig(
name=model_name,
temperature=temperature, # Lower temperature for more focused extractions
top_p=0.8, # Slightly more focused sampling
num_predict=1500 # Reduced to encourage concise outputs
)
self.llm_interface = LLMInterface(
model_config=self.model_config,
max_retries=max_retries,
timeout=timeout
temperature=temperature,
top_p=0.8,
num_predict=1500,
timeout=timeout,
max_retries=max_retries
)
# USES NEW SHARED SERVICE FROM SRC
self.llm_interface = LLMService(model_config=self.model_config)

# Quality patterns for filtering false positives
self.low_quality_patterns = [
Expand Down Expand Up @@ -941,4 +905,4 @@ def quick_test():
return result

if __name__ == "__main__":
test_llm_extractor()
test_llm_extractor()
108 changes: 35 additions & 73 deletions GSoC25_H/llm_IE/llm_interface.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import sys
import os
import requests
import time
import re
from typing import Dict, List, Any, Optional, Tuple
from typing import Dict, List, Optional
from dataclasses import dataclass, field

from config import ModelConfig
# Add parent directory to path to allow importing from src
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from src.llm_core import LLMService, ModelConfig as SharedConfig
from output_parser import OutputParser
from config import ModelConfig

@dataclass
class ExtractionResult:
Expand All @@ -18,97 +21,56 @@ class ExtractionResult:
error: Optional[str] = None

class OllamaInterface:
"""Unified interface for interacting with Ollama models"""
"""
Unified interface for interacting with Ollama models.
Refactored to use the shared src.llm_core.LLMService instead of raw requests.
"""

def __init__(self, model_config: ModelConfig, base_url: str = "http://localhost:11434"):
self.model_config = model_config
self.base_url = base_url.rstrip('/')
self.api_endpoint = f"{self.base_url}/api"
self.output_parser = OutputParser()

if not self._is_available():
print(f"Warning: Ollama model '{self.model_config.name}' not found locally. Trying to pull it...")
if not self._pull_model():
raise ConnectionError(f"Failed to pull or connect to Ollama model {self.model_config.name}")

def _is_available(self) -> bool:
"""Check if the Ollama model is available locally"""
try:
response = requests.get(f"{self.api_endpoint}/tags")
response.raise_for_status()
models = response.json().get("models", [])
return any(m['name'] == self.model_config.name for m in models)
except requests.exceptions.RequestException:
return False

def _generate_text(self, prompt: str) -> str:
"""Generic text generation using the configured Ollama model."""
start_time = time.time()

payload = {
"model": self.model_config.name,
"prompt": prompt,
"stream": False,
"options": {
"temperature": self.model_config.temperature,
"top_p": self.model_config.top_p,
"top_k": self.model_config.top_k,
"num_predict": self.model_config.max_tokens,
}
}
# ADAPTER: Convert local llm_IE config to the Shared Config
# We map 'max_tokens' (from llm_IE) to 'num_predict' (shared core)
shared_config = SharedConfig(
name=model_config.name,
temperature=model_config.temperature,
top_p=model_config.top_p,
num_predict=getattr(model_config, 'max_tokens', 2000),
timeout=getattr(model_config, 'timeout', 60),
max_retries=3
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.

try:
response = requests.post(
f"{self.api_endpoint}/generate",
json=payload,
timeout=self.model_config.timeout,
headers={"Content-Type": "application/json"}
)
response.raise_for_status()

result = response.json()
return result.get("response", "").strip()

except requests.exceptions.RequestException as e:
print(f"Error during Ollama API request: {e}")
return ""
# Initialize the shared service
self.service = LLMService(shared_config)
Comment thread
mallasiddharthreddy marked this conversation as resolved.

def extract_relations(self, sentence: str, prompt: str) -> ExtractionResult:
"""Extracts relations from a sentence using a given prompt."""
"""Extracts relations from a sentence using the shared LLM service."""
start_time = time.time()

raw_output = self._generate_text(prompt)
# Prepare standard message format for the shared service
messages = [{"role": "user", "content": prompt}]

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Use shared service (Handles retries and connection automatically)
response = self.service.generate_response(messages)
processing_time = time.time() - start_time

if not raw_output:
if not response:
return ExtractionResult(
success=False,
raw_output="",
processing_time=processing_time,
error="Failed to generate text from model."
)

# Extract text content from the Ollama response dictionary
raw_output = response.get("message", {}).get("content", "").strip()

# Parse output using existing parser
parsed_triplets, _ = self.output_parser.parse_and_format(raw_output)

return ExtractionResult(
success=len(parsed_triplets) > 0,
raw_output=raw_output,
parsed_triplets=parsed_triplets,
processing_time=processing_time
)

def _pull_model(self) -> bool:
"""Pull the model from the Ollama registry."""
print(f"Pulling model: {self.model_config.name}. This may take a while...")
try:
response = requests.post(
f"{self.api_endpoint}/pull",
json={"name": self.model_config.name, "stream": False},
timeout=300 # 5-minute timeout for pulling
)
response.raise_for_status()
print(f"Model '{self.model_config.name}' pulled successfully.")
return True
except requests.exceptions.RequestException as e:
print(f"Failed to pull model '{self.model_config.name}': {e}")
return False
)
141 changes: 141 additions & 0 deletions GSoC25_H/src/llm_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import time
import logging
import ollama
from dataclasses import dataclass
from typing import List, Dict, Any, Optional

@dataclass
class ModelConfig:
"""Unified Configuration for the LLM model."""
name: str = "gemma3:12b-it-qat"
temperature: float = 0.1
top_p: float = 0.9
num_predict: int = 2000
timeout: int = 60
max_retries: int = 3

class LLMService:
"""
Centralized service for LLM interactions.
Replaces duplicative logic in IndIE/llm_extractor.py and llm_IE/llm_interface.py
"""
def __init__(self, model_config: ModelConfig):
self.config = model_config
self.client = ollama.Client(timeout=self.config.timeout)
self.logger = logging.getLogger(self.__class__.__name__)
self._ensure_model_available()

def _ensure_model_available(self):
"""Checks if model exists, attempts to pull if missing (logic from llm_IE)."""
try:
# List available models to check if our model exists
available_models_response = self.client.list()

# Handle different possible response structures
if hasattr(available_models_response, 'models'):
models_list = available_models_response.models
elif isinstance(available_models_response, dict):
models_list = available_models_response.get('models', [])
elif isinstance(available_models_response, list):
models_list = available_models_response
else:
# Fallback: try a lightweight chat call to verify model availability
self.logger.warning(f"Could not parse model list response. Attempting direct model check...")
try:
self.client.chat(
model=self.config.name,
messages=[{'role': 'user', 'content': 'test'}],
options={'num_predict': 1}
)
self.logger.debug(f"Model '{self.config.name}' is available (verified via chat)")
return
except Exception:
# Model not available, will attempt to pull
models_list = []

# Extract model names from the list
model_names = []
for model in models_list:
if isinstance(model, dict):
model_names.append(model.get('name', ''))
elif hasattr(model, 'name'):
model_names.append(model.name)
elif isinstance(model, str):
model_names.append(model)

if self.config.name not in model_names:
self.logger.info(f"Model '{self.config.name}' not found locally. Attempting to pull...")
# Pull the model
self.client.pull(self.config.name)

# Verify the model was pulled successfully by listing again
available_models_after = self.client.list()

# Parse the response again
if hasattr(available_models_after, 'models'):
models_list_after = available_models_after.models
elif isinstance(available_models_after, dict):
models_list_after = available_models_after.get('models', [])
elif isinstance(available_models_after, list):
models_list_after = available_models_after
else:
models_list_after = []

model_names_after = []
for model in models_list_after:
if isinstance(model, dict):
model_names_after.append(model.get('name', ''))
elif hasattr(model, 'name'):
model_names_after.append(model.name)
elif isinstance(model, str):
model_names_after.append(model)

if self.config.name not in model_names_after:
# Final verification: try a lightweight chat call
try:
self.client.chat(
model=self.config.name,
messages=[{'role': 'user', 'content': 'test'}],
options={'num_predict': 1}
)
self.logger.info(f"Successfully pulled and verified model '{self.config.name}'")
except Exception as verify_error:
raise RuntimeError(
f"Failed to pull model '{self.config.name}'. "
f"Model not available after pull attempt: {verify_error}"
)
else:
self.logger.info(f"Successfully pulled model '{self.config.name}'")
else:
self.logger.debug(f"Model '{self.config.name}' is already available")

except Exception as e:
self.logger.error(f"Model availability check/pull failed for '{self.config.name}': {e}", exc_info=True)
raise

def generate_response(self, messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""
Generates a response with standardized retry logic (logic from IndIE).
"""
retries = 0
while retries < self.config.max_retries:
try:
response = self.client.chat(
model=self.config.name,
messages=messages,
options={
"temperature": self.config.temperature,
"top_p": self.config.top_p,
"num_predict": self.config.num_predict
}
)
return response

except Exception as e:
retries += 1
wait_time = 2 ** retries
print(f"Error calling model '{self.config.name}': {e}. Retrying ({retries}/{self.config.max_retries}) in {wait_time}s...")
time.sleep(wait_time)

print(f"Failed to get response from '{self.config.name}' after {self.config.max_retries} retries.")
Comment thread
mallasiddharthreddy marked this conversation as resolved.
Outdated
return None