1+ ## L'idée est d'estimer le nombre de réponses envidagées par le LLM
2+ # On compare ce nombre lorsque l'entrée est nulle versus lorsque l'entrée est fournie.
3+ # Si le LLM envisage Rdata=10 réponses possibles pour une entrée donnée, Rnull=10e5 lorsque l'entrée est nulle,
4+ # On peut estimer qu'avec l'entrée donnée, il a décider d'éliminer (10e5 -10) réponses possibles.
5+ # Sa confiance peut être estimée comme le ratio entre les réponses envisagées avec l'entrée donnée
6+ # et les réponses envisagées avec l'entrée nulle.
7+ # soit Confiance = (Rnull - Rdata + 1) / ( Rnull )
8+
9+ # Etude aux limites:
10+ # - Si Rdata est proche de Rnull, confiance proche de 0 => le LLM n'a pas su utiliser l'entrée pour éliminer des réponses possibles
11+ # - Si Rdata est proche de 1, confiance proche de 1 => le LLM a éliminé presque toutes les réponses possibles, il est donc très confiant.
12+
13+ from OpenHosta import emulate , max_uncertainty , print_last_prompt
14+
15+ @max_uncertainty (threshold = 0.9 )
16+ def name_of_president (country :str , year :int )-> str :
17+ """
18+ Returns the name of the president of the given country.
19+
20+ Args:
21+ country (str): The name of the country.
22+ year (int): The year for which to find the president.
23+
24+ Returns:
25+ str: The name of the president.
26+ """
27+ return emulate ()
28+
29+
30+ answer = name_of_president ("France" , 2021 )
31+ # This run the function two times, once with the data, once with None
32+ # In order to approximate the uncertainty of the model, we take:
33+ # - the lower estimator when the model is given no input. (all takens that are start of the chosen one are supposed to be identical)
34+ # - the higher estimator when the model is given input. (all tokens are supposed to be different)
35+ # ! We need to decide how to separate tokens that are assumed to be acceptable path. We use Enthropy thresholding for that:
36+ # - Tokens with probability higher than (1 / number_of_possible_answers) are considered acceptable.
37+ # log(1/150000) = -11.92
38+
39+ print_last_prompt (name_of_president )
40+
41+ # This returns both estimations:
42+ # name_of_president:
43+ # - with data: "Emmanuel Macron": estimated 5 possible answers
44+ # - with None: estimated 50 possible answers
45+ # Uncertainty is estimated as (50 - 5 + 1) / 50 = 0.92
46+ # The model is pretty confident in its answer, as it has eliminated most of the possible answers
47+
48+ from OpenHosta import max_uncertainty , print_last_prompt , print_last_uncertainty
49+
50+ from OpenHosta import config , emulate
51+ config .DefaultModel .model_name
52+
53+ @max_uncertainty ()
54+ def BestDate (assertion :str )-> int :
55+ """
56+ This function return the best year for a given assertion.
57+ """
58+ return emulate (force_llm_args = {"logprobs" :True , "top_logprobs" :20 })
59+
60+ answer = BestDate ("The fall of the Berlin Wall" )
61+ # returns something like "1989"
62+
63+ print_last_uncertainty (BestDate )
64+ print_last_prompt (BestDate )
65+
66+ answer = BestDate ("Emmanuel BATT's birth year" )
67+ answer = BestDate ("l'année ne naissance du christ" )
68+
69+ from OpenHosta .core .uncertainty import last_uncertainty
70+ a = []
71+ b = []
72+ for i in range (30 ):
73+ #answer = BestDate("Célestine birth year")
74+ answer = BestDate ("l'année ne naissance du christ" )
75+ a .append (answer )
76+ b .append (last_uncertainty (BestDate ))
77+
78+ import collections
79+ collections .Counter (a )
80+
81+ print (len (set (a )))
82+ print (b )
83+
84+ answer = BestDate ('random' )
85+ # In only one prompt 1/1 = 1
86+
87+ BestDate ("le jour le plus long" )
88+ print_last_uncertainty (BestDate )
89+
90+ BestDate ("un siècle après -58 " )
91+
92+ def president_name (country :str , year :int )-> str :
93+ """
94+ Return the name of the president
95+ """
96+ return emulate (force_llm_args = {"logprobs" :True , "top_logprobs" :20 })
97+
98+
99+ president_name ("lichtenstein" , 1956 )
100+
101+
102+ def first_sentence_of (subject :str )-> str :
103+ """
104+ Return the first sentence of a writing on the subject
105+ """
106+ return emulate (force_llm_args = {"logprobs" :True , "top_logprobs" :20 })
107+
108+
109+ first_sentence_of ("la belle au bois dormant" )
110+ first_sentence_of (None )
111+
112+
113+ @max_uncertainty ()
114+ def question (prompt :str )-> str :
115+ """
116+ Answer to a question
117+ """
118+ return emulate ()
119+
120+ question ("distance lune terre. donne juste la distance en km en notation scientifique avec 2 chiffre scinificatif." )
121+ question ("distance lune terre. donne juste la distance en milliers de km." )
122+ print_last_uncertainty (question )
123+
124+ from dataclasses import dataclass
125+
126+ @dataclass
127+ class Person :
128+ name :str
129+ last_name :str
130+
131+ @max_uncertainty ()
132+ def extract_name (snippet :str ) -> Person :
133+ """
134+ Identify the person in a snippet of text.
135+
136+ Return:
137+ a Person object (str are in between ')
138+ """
139+ return emulate ()
140+
141+ extract_name ("le gagnant des élections de 1986 est Jean-Edouard. il est le fils de Charles Dupond et a conservé le nom de famille" )
142+ print_last_uncertainty (extract_name )
143+
144+ import math
145+ i = 1
146+ i += 1
147+ [(x ["token" ], math .exp (x ["logprob" ])) for x in sorted (extract_name .hosta_inspection ["logs" ]["llm_api_response" ]["choices" ][0 ]["logprobs" ]["content" ][i ]["top_logprobs" ], key = lambda x : - x ["logprob" ])[:4 ]]
0 commit comments