Skip to content

Commit 999a0d7

Browse files
authored
Merge pull request #239 from hand-e-fr/safe_generation
Safe generation
2 parents 2eb80ad + d8fa0a1 commit 999a0d7

26 files changed

Lines changed: 1577 additions & 259 deletions

docs/doc.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Let's **get started**! First here's the **table of contents** to help you naviga
8282
- [Make sure you have installed OpenHosta](#make-sure-you-have-installed-openhosta)
8383
- [OpenHosta Example](#openhosta-example)
8484
- [Basic Setup](#basic-setup)
85+
- [List of supported environnemnt variables](#list-of-supported-environnemnt-variables)
8586
- [Reasoning models](#reasoning-models)
8687
- [Changing the MetaPrompt](#changing-the-metaprompt)
8788
- [`emulate` Function](#emulate-function)
@@ -188,6 +189,18 @@ my_model = OpenAICompatibleModel(
188189
config.DefaultModel = my_model # set it as the default model for all functions using default pipeline
189190
```
190191

192+
## List of supported environnemnt variables
193+
194+
```
195+
OPENHOSTA_DEFAULT_MODEL_API_KEY="your_api_key"
196+
OPENHOSTA_DEFAULT_MODEL_BASE_URL="https://api.openai.com/v1" # Optional
197+
OPENHOSTA_DEFAULT_MODEL_NAME="gpt-5" # Default to "gpt-4.1"
198+
OPENHOSTA_DEFAULT_MODEL_TEMPERATURE=0.7 # Optional
199+
OPENHOSTA_DEFAULT_MODEL_TOP_P=0.9 # Optional
200+
OPENHOSTA_DEFAULT_MODEL_MAX_TOKENS=2048 # Optional
201+
OPENHOSTA_DEFAULT_MODEL_SEED=42 # Optional. If set with a local LLM your application will be deterministic.
202+
OPENHOSTA_RATE_LIMIT_WAIT_TIME=60 # When OpenAI API return code 429 (RateLimitError) wait this ammount of seconds before retry. (0 for no retry)
203+
```
191204

192205
## Reasoning models
193206

next/Agent.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"!pip install --upgrade -qqq uv\n",
2727
"\n",
2828
"# If you need to test a pre release uncomment:\n",
29-
"VERSION=\"@3.0.3\"\n",
29+
"VERSION=\"@3.0.4\"\n",
3030
"\n",
3131
"!uv pip install -U \\\n",
3232
" \"git+https://github.com/hand-e-fr/OpenHosta.git$VERSION\""

next/agents.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@
2727
### return emulate()
2828

2929

30+
31+
##### TODO:
32+
# AgentList object to manage multiple agents
33+
# Agent.clear() to clear memory
34+
# Agent.save() to save memory to disk
35+
# Agent.load() to load memory from disk
36+
# Agent.internal_thinking_logs = [...] to store internal thoughts
37+
# Agent.working_documents = [...] to store working documents
38+
# Agent.chat_history = [...] to store chat history with other agents or users
39+
3040
# For agents we need local scope to be passed to the LLM
3141
ret_dict = {
3242
"local_1": 42,

next/anytypes_probs.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
## L'idée est d'estimer le nombre de réponses envidagées par le LLM
2+
# On compare ce nombre lorsque l'entrée est nulle versus lorsque l'entrée est fournie.
3+
# Si le LLM envisage Rdata=10 réponses possibles pour une entrée donnée, Rnull=10e5 lorsque l'entrée est nulle,
4+
# On peut estimer qu'avec l'entrée donnée, il a décider d'éliminer (10e5 -10) réponses possibles.
5+
# Sa confiance peut être estimée comme le ratio entre les réponses envisagées avec l'entrée donnée
6+
# et les réponses envisagées avec l'entrée nulle.
7+
# soit Confiance = (Rnull - Rdata + 1) / ( Rnull )
8+
9+
# Etude aux limites:
10+
# - Si Rdata est proche de Rnull, confiance proche de 0 => le LLM n'a pas su utiliser l'entrée pour éliminer des réponses possibles
11+
# - Si Rdata est proche de 1, confiance proche de 1 => le LLM a éliminé presque toutes les réponses possibles, il est donc très confiant.
12+
13+
from OpenHosta import emulate, max_uncertainty, print_last_prompt
14+
15+
@max_uncertainty(threshold=0.9)
16+
def name_of_president(country:str, year:int)->str:
17+
"""
18+
Returns the name of the president of the given country.
19+
20+
Args:
21+
country (str): The name of the country.
22+
year (int): The year for which to find the president.
23+
24+
Returns:
25+
str: The name of the president.
26+
"""
27+
return emulate()
28+
29+
30+
answer = name_of_president("France", 2021)
31+
# This run the function two times, once with the data, once with None
32+
# In order to approximate the uncertainty of the model, we take:
33+
# - the lower estimator when the model is given no input. (all takens that are start of the chosen one are supposed to be identical)
34+
# - the higher estimator when the model is given input. (all tokens are supposed to be different)
35+
# ! We need to decide how to separate tokens that are assumed to be acceptable path. We use Enthropy thresholding for that:
36+
# - Tokens with probability higher than (1 / number_of_possible_answers) are considered acceptable.
37+
# log(1/150000) = -11.92
38+
39+
print_last_prompt(name_of_president)
40+
41+
# This returns both estimations:
42+
# name_of_president:
43+
# - with data: "Emmanuel Macron": estimated 5 possible answers
44+
# - with None: estimated 50 possible answers
45+
# Uncertainty is estimated as (50 - 5 + 1) / 50 = 0.92
46+
# The model is pretty confident in its answer, as it has eliminated most of the possible answers
47+
48+
from OpenHosta import max_uncertainty, print_last_prompt, print_last_uncertainty
49+
50+
from OpenHosta import config, emulate
51+
config.DefaultModel.model_name
52+
53+
@max_uncertainty()
54+
def BestDate(assertion:str)->int:
55+
"""
56+
This function return the best year for a given assertion.
57+
"""
58+
return emulate(force_llm_args={"logprobs":True, "top_logprobs":20})
59+
60+
answer = BestDate("The fall of the Berlin Wall")
61+
# returns something like "1989"
62+
63+
print_last_uncertainty(BestDate)
64+
print_last_prompt(BestDate)
65+
66+
answer = BestDate("Emmanuel BATT's birth year")
67+
answer = BestDate("l'année ne naissance du christ")
68+
69+
from OpenHosta.core.uncertainty import last_uncertainty
70+
a = []
71+
b = []
72+
for i in range(30):
73+
#answer = BestDate("Célestine birth year")
74+
answer = BestDate("l'année ne naissance du christ")
75+
a.append(answer)
76+
b.append(last_uncertainty(BestDate))
77+
78+
import collections
79+
collections.Counter(a)
80+
81+
print(len(set(a)))
82+
print(b)
83+
84+
answer = BestDate('random')
85+
# In only one prompt 1/1 = 1
86+
87+
BestDate("le jour le plus long")
88+
print_last_uncertainty(BestDate)
89+
90+
BestDate("un siècle après -58 ")
91+
92+
def president_name(country:str, year:int)->str:
93+
"""
94+
Return the name of the president
95+
"""
96+
return emulate(force_llm_args={"logprobs":True, "top_logprobs":20})
97+
98+
99+
president_name("lichtenstein", 1956)
100+
101+
102+
def first_sentence_of(subject:str)->str:
103+
"""
104+
Return the first sentence of a writing on the subject
105+
"""
106+
return emulate(force_llm_args={"logprobs":True, "top_logprobs":20})
107+
108+
109+
first_sentence_of("la belle au bois dormant")
110+
first_sentence_of(None)
111+
112+
113+
@max_uncertainty()
114+
def question(prompt:str)->str:
115+
"""
116+
Answer to a question
117+
"""
118+
return emulate()
119+
120+
question("distance lune terre. donne juste la distance en km en notation scientifique avec 2 chiffre scinificatif.")
121+
question("distance lune terre. donne juste la distance en milliers de km.")
122+
print_last_uncertainty(question)
123+
124+
from dataclasses import dataclass
125+
126+
@dataclass
127+
class Person:
128+
name:str
129+
last_name:str
130+
131+
@max_uncertainty()
132+
def extract_name(snippet:str) -> Person:
133+
"""
134+
Identify the person in a snippet of text.
135+
136+
Return:
137+
a Person object (str are in between ')
138+
"""
139+
return emulate()
140+
141+
extract_name("le gagnant des élections de 1986 est Jean-Edouard. il est le fils de Charles Dupond et a conservé le nom de famille")
142+
print_last_uncertainty(extract_name)
143+
144+
import math
145+
i=1
146+
i+=1
147+
[(x["token"], math.exp(x["logprob"])) for x in sorted(extract_name.hosta_inspection["logs"]["llm_api_response"]["choices"][0]["logprobs"]["content"][i]["top_logprobs"], key=lambda x: -x["logprob"])[:4]]

0 commit comments

Comments
 (0)