From f607f7662dcbc7bd76f25fd8ec1d66e931dc7f07 Mon Sep 17 00:00:00 2001 From: souzatharsis Date: Mon, 25 Nov 2024 19:27:19 -0300 Subject: [PATCH] update outlines --- .../_build/.doctrees/environment.pickle | Bin 742286 -> 744254 bytes .../notebooks/structured_output.doctree | Bin 81055 -> 82039 bytes .../notebooks/structured_output.ipynb | 2 +- .../html/notebooks/structured_output.html | 2 +- tamingllms/_build/html/searchindex.js | 2 +- .../jupyter_execute/markdown/intro.ipynb | 2 +- .../notebooks/structured_output.ipynb | 2 +- tamingllms/notebooks/structured_output.ipynb | 2 +- 8 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tamingllms/_build/.doctrees/environment.pickle b/tamingllms/_build/.doctrees/environment.pickle index c46a6202dc20f5dd391169e50ed0d826afd3f003..927a40f6d5b5c8fd15b572c55f6cdb063a9d5ce2 100644 GIT binary patch delta 3569 zcmeH}du$X%7{G7u$<17Q*8+vg!?i-d+K071qZTa%T6vQaH3f8gx4qlt*ll+ATB;zm zMzEn!^$H_Ife>GzwzTpXp%F#!fvFPkjc7ncFu^AfQ9|^az46o-|1icsG~6He{l0I$ znfYeEnQv$1(}laX7xt7?TpU;+8S^80qgHI`;f}VTrdx8YuFp4%JtU<+grHGm^Ek%nmFCtC!=92n6SI!bK zu16Ki6btR%hSVsUVcme9b%wscw$&+`6c^)C6U?C3*(Oa+7+}+6i;WK^$r2?)7B$^i zAjRWNVp!K2WW$mpB5aW^TJl0mwDkG1W)_L%P0VQMkwjS0mPExc^sGzRy$q?2^|3<| zYZb|q&EKshENNmip+samh6Q>=j>Dj0J#>fA!Z4qtXnYz`hxtVv)@r~Ia$TJqwv+}r z6`E-o;K}wvSprMf6IOi!B#b4o(IPS|8Nx7Si!B&(uS7(>-cocHesxSS!Aoy|Kwy`^ zvoOkrgATB2SXVqXH%o>c;}}zn%9?CQYyj3XESoS+6@(g-f|l4QHT||o@@*v=jbpyK z9kHC->;74O=Z$g=y^!C3J-?qeulef?XR)jCrwkWRd8(^7JGiQ6oWuU55>6!vl?ZgI z3Q9W=;E*|er@o&($bEUTbsM&y3%E5*ShH0wQm1JXULt5SC!Ihj=E2E zk@chfmE_4DG?KQrdmbDh1nAI5Jx2zD{IS#XOeV;UJ3Sk+K#uSCoCts{_|TJsnf|$F zFamkU*PgNrkdL18jQ4>&?DUQUNrofuHa9D5$^5l#&=+sW1IZVi;yI=KnD3 zzg9A`yb=XG7Yf;b;zRW}3mJt^RqfottS+gwP!{HPsI4IE>O`YYp=P%bA}2PX&!C3% zccMcAl!blWNFkwa^gMa03#sJF&FER@Wj=CbGa5o#deEgrKkgnxih58rDSr;s$J#xk z$jaA{m+bFBpThtdy(o{2=r3ro#9m%%A5fyr4G%uEV{yk=HAIRJp=-%qThM0m;z5)}#jR+fz%ZLG z+=d?D7^G-1$92!Av2Z7<=hn@ANa2|OLfT=+E+lbl!Aocv$5(cvdpXYDgIYL#xff+S zNrfNl>0Ovc>a%bb{pvMzmB28YmhMAKIPQG|JEQKY1l9f8MLR|H5~uUasNnsp5rGXj&q!ohgS$A*ioMx zu+RBqY#;K;A^U`bF-X0m@G%#Bo==X!H(bu{!b2*}aX8xHn5VQIf6)4V)yS-|xEK=M zG8Xry=76isdVpXT2?V=EAcAB@KAw`x!f3TUT5X3`yS)`c*p`py3POkuEWjGO|1E`h zhTpz|xmUsAnYmP$K*S>0y$~^qaCNrbbgabTSlXcu2M;xqegMtvFy)uyi$bew!1fsX zTf_d6w7MKmXA?Xz9`9s_^UewQWj8!E@@~cZ1air(c<}Zqc#+__DOMJ{f!;S2XLx@B D?^f8o delta 1727 zcmZXUYfMx}7>0Lt_w42Smc3EdfEpER3tB}`F;>J_Q9zVRr9T?P#tRjWbl0$0t5zX3 zmTp8vcX5hPu&HUA8bNDezzu4(vGEdxrlygIm+BgRG^s^Q+oaacoEP zxxBNdyQc2_eCqM^9Qv!>G&Z^3;q_IQt?~Ib_|wYXSzYN5)!W}*UA1AIZ?iY8!dqSC z3og$YA7_hs)xNbSKh6QeBD+zi%Uy7zf6fDzeO-0Ns-BukH7-`2``xQLV%3F9XH{FQ zx>|WZ_;c-!TeZCONDt283bP9*`>yH?*eq_5vbS1&ab z8^-5XQ&*8;lBL{S8Y=hv!)Dwbbep0+b5%e(VbjVCe-LY9ZI({^u@~1H4$XGV&YTgF z(~aq~Gwb?V4Xm%#XkDzg8)^Tih8fHrF+Ms*oV3Ve%4PXMmZIp_%K~CewVbbR^Ey$;&V8rX_@z zj~%9V;;cL3G_fO2^PEZ~gC>=(0kv{nveoU0Dv9r3vC3)HVE0N<(UZOT9P^HmV(Jjl@nr z`U>vxWxjV31_*t(3sicx6S|3wZRvspqQ7UN4Gc!{fxX?3OiRO%MDH^w- z!Ea!rbl*G=6%q%&l>-u3&R-*rVI3UsU12juKcO3!}SN;gwqDVPnaHQ7FlpCPR?%Dx3Byq}3 z*dy`cAk;{F<2J7 z;tm`7qeyL&(Zdq$_?qn6?!e829=!^_Xi|2&u#ox&K(1}rh4YE1-E3hz@_W{B;5o4( z4NE+jE%E3m@j=baNW>zEjY-%jF>8$2lQ!VySbQk));KJWBIWT=Sm^{@DZ4Hw<9>mUSQJ#UVvJ<3!GZCxoKlvQ!rxS;b}?3xl_a=Xi%9) zr`s1w(PlKLJ^!l0&nl{)j%oawGp6H-=r{Od7W27?7sN$8A1=0o8Mri2tRkQd2x$EQ ztvkS1P?mwEgxJ~n8R#Wu?af8fsq>oo6-Ql29W(RULrk1W?@YDi(!N>zAni0c6N}w) zv`+WIvytq>Oe~5g4myyJNzRrMK07FyCea*Z#wGZw_*iiPo)Rf_EW>j~+Pnf2*n=YM YB>ZotWd$}6CA)lX`8<~XI>wp)22&O<;Q#;t diff --git a/tamingllms/_build/.doctrees/notebooks/structured_output.doctree b/tamingllms/_build/.doctrees/notebooks/structured_output.doctree index e7631122131fa16629a4b25379c512912fb1c916..1e6cc5689ff0404e9e7259593ba3e6b68ebe3856 100644 GIT binary patch delta 1048 zcmeHGy>1gh5O&}!WKl#=M08Appdc2=Mbd$&prQ#<8j8K!xw|Fn9cy>i<|_LE(m5%h z$twg|h!WB922?Z@NXZKzBnoENNI*dc9oL*Rn(uGEyOXcQmqou=^p97sid$#>{qjPu zw;Z1BJ$&Lcdf*nJCDOOyab#;82AGjQ2vByQ^s^Y)xF&B+Je8_6)^G>f1oozkY{)I| zzulUHrN(f8?Y!#M3_?X-$ZAjHJgbdU7Od(-l{Gdktb2!!le?MMu{vo0&)sG zRn};rNjjO>-)l{fEt-lExsq&dH;GOFl3H@vwRY+!d%PK-L5E%?f`qIv2+t$2+Nw>4 znd*LqTn^4}Ty8HtJ!-f1>%U(3@ZGilh5z%yKP_JTx$-ONU-_-_)!_W&)xq)UymR delta 95 zcmey~z&igY3rhp*RPK!|8ZVi8swW%13}IIAESdc9ttv+bd+d}9F@)gehi~r+Z9UFt kz|1+d!G-}0wtr`4oMp~$Ze~0Ms+`5p+-&;-YsTv=0MjHQ00000 diff --git a/tamingllms/_build/html/_sources/notebooks/structured_output.ipynb b/tamingllms/_build/html/_sources/notebooks/structured_output.ipynb index 8e4c876..70c8ef2 100644 --- a/tamingllms/_build/html/_sources/notebooks/structured_output.ipynb +++ b/tamingllms/_build/html/_sources/notebooks/structured_output.ipynb @@ -587,7 +587,7 @@ "source": [ "### Outlines\n", "\n", - "Outlines is a library specifically focused on structured text generation from LLMs. It provides several powerful features:\n", + "Outlines is a library specifically focused on structured text generation from LLMs. Under the hood, Outlines works by adjusting the probability distribution of the model's output logits - the raw scores from the final layer of the neural network that are normally converted into text tokens. By introducing carefully crafted logit biases, Outlines can guide the model to prefer certain tokens over others, effectively constraining its outputs to a predefined set of valid options. This provides fine-grained control over the model's generation process. In that way, Outlines provides several powerful features:\n", "\n", "* **Multiple Choice Generation**: Restrict the LLM output to a predefined set of options.\n", "* **Regex-based structured generation**: Guide the generation process using regular expressions.\n", diff --git a/tamingllms/_build/html/notebooks/structured_output.html b/tamingllms/_build/html/notebooks/structured_output.html index f8685a1..4133ea4 100644 --- a/tamingllms/_build/html/notebooks/structured_output.html +++ b/tamingllms/_build/html/notebooks/structured_output.html @@ -626,7 +626,7 @@

3.3.4. Outlines

-

Outlines is a library specifically focused on structured text generation from LLMs. It provides several powerful features:

+

Outlines is a library specifically focused on structured text generation from LLMs. Under the hood, Outlines works by adjusting the probability distribution of the model’s output logits - the raw scores from the final layer of the neural network that are normally converted into text tokens. By introducing carefully crafted logit biases, Outlines can guide the model to prefer certain tokens over others, effectively constraining its outputs to a predefined set of valid options. This provides fine-grained control over the model’s generation process. In that way, Outlines provides several powerful features:

  • Multiple Choice Generation: Restrict the LLM output to a predefined set of options.

  • Regex-based structured generation: Guide the generation process using regular expressions.

  • diff --git a/tamingllms/_build/html/searchindex.js b/tamingllms/_build/html/searchindex.js index 5aee8e1..9b0c059 100644 --- a/tamingllms/_build/html/searchindex.js +++ b/tamingllms/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["1. Introduction", "Taming Large Language Models", "4. Challenges of Evaluating LLM-based Applications", "2. Output Size Limitations", "3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": [0, 2], "llm": [0, 1, 3, 4], "have": [0, 2, 3, 4], "emerg": 0, "transform": [0, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3, 4], "chatgpt": [0, 2], "github": 0, "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": 0, "replit": 0, "other": [0, 2, 3, 4], "system": [0, 1, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": 0, "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3, 4], "navig": 0, "As": [0, 3], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": 0, "requir": [0, 2, 3, 4], "manag": [0, 1, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": 0, "understand": [0, 1, 2, 3, 4], "hidden": [0, 1], "cost": [0, 2], "reliabl": [0, 1, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 3], "har": [0, 3], "impact": [0, 2, 3], "capabl": [0, 2, 3], "ar": [0, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 2, 3, 4], "throughout": [0, 3, 4], "tackl": 0, "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3], "behavior": [0, 1, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 1, 2, 3, 4], "make": [0, 2, 3, 4], "test": [0, 1, 2], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3], "consist": [0, 1, 2, 3, 4], "format": [0, 1, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 1, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1, 2], "plausibl": 0, "sound": 0, "entir": [0, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "base": [0, 3, 4], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2], "methodologi": 0, "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 3, 4], "hand": [0, 3], "provid": [0, 2, 3], "concret": 0, "exampl": 0, "you": [0, 2, 3, 4], "run": [0, 4], "modifi": 0, "scenario": [0, 2], "solut": [0, 1, 3], "strategi": [0, 1, 2, 3], "best": [0, 1], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": 0, "limit": [0, 2, 4], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "robust": [0, 3], "implement": [0, 1, 2, 3], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "equip": 0, "leverag": [0, 3, 4], "effect": [0, 1, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": 0, "toward": 0, "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": 0, "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3], "both": [0, 2], "help": [0, 2, 3, 4], "reader": 0, "decis": [0, 4], "about": [0, 2, 3, 4], "design": [0, 3, 4], "lead": [0, 2, 3, 4], "initi": [0, 3], "technic": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "develop": [0, 2, 3, 4], "exist": 0, "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": 0, "desir": [0, 4], "overcom": [0, 3], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "abl": [0, 3, 4], "framework": [0, 2, 4], "deploi": [0, 3], "proper": 0, "safeguard": 0, "realist": 0, "estim": 0, "project": 0, "timelin": 0, "To": [0, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "experi": [0, 2, 3], "access": [0, 4], "knowledg": [0, 2], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 4], "grade": 0, "befor": 0, "dive": 0, "here": [0, 2, 3, 4], "": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": 0, "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "sourc": [0, 2, 4], "bin": 0, "On": 0, "window": [0, 1], "script": [0, 1], "instal": [0, 4], "packag": 0, "pip": [0, 4], "r": [0, 2, 3, 4], "txt": [0, 2, 3, 4], "file": [0, 2, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": 0, "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 4], "commit": 0, "version": [0, 4], "control": [0, 2, 4], "It": [0, 3, 4], "contain": [0, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 3, 4], "retri": [0, 4], "logic": [0, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "like": [0, 2, 3, 4], "poetri": 0, "check": 0, "page": 0, "known": [0, 2, 4], "now": [0, 3, 4], "let": [0, 2, 3], "begin": 0, "practic": [1, 2, 3], "guid": [1, 4], "python": [1, 4], "challeng": [1, 3], "why": 1, "thi": [1, 2, 3, 4], "book": 1, "matter": [1, 2], "overview": 1, "kei": [1, 4], "temperatur": [1, 3], "random": [1, 2], "evalu": [1, 3], "measur": [1, 2], "observ": [1, 2, 4], "log": 1, "monitor": 1, "debug": 1, "respons": [1, 2, 3, 4], "workflow": 1, "common": [1, 3, 4], "failur": 1, "mode": 1, "text": [1, 2, 3, 4], "inconsist": [1, 2, 4], "valid": [1, 2, 4], "recoveri": 1, "enforc": [1, 4], "type": [1, 2, 3, 4], "detect": [1, 4], "ground": [1, 2], "retriev": 1, "augment": [1, 2], "rag": 1, "context": [1, 2, 3, 4], "select": 1, "index": [1, 3], "vector": 1, "store": [1, 3], "chunk": 1, "method": [1, 2, 3, 4], "pipelin": 1, "token": [1, 2, 4], "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "issu": [1, 2, 3, 4], "guard": 1, "content": 1, "filter": 1, "sanit": 1, "alert": 1, "constraint": [1, 3], "long": 1, "form": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "llama": 1, "llamafil": 1, "setup": 1, "usag": 1, "ollama": 1, "deploy": 1, "consider": 1, "migrat": 1, "complet": [1, 2, 3, 4], "util": [1, 3], "function": [1, 2, 3, 4], "configur": [1, 2], "templat": [1, 2, 3], "recommend": [1, 3], "librari": [1, 3, 4], "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "One": 2, "i": [2, 3, 4], "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3], "each": [2, 3], "time": [2, 3, 4], "thei": [2, 3, 4], "re": [2, 3], "queri": 2, "even": [2, 3, 4], "prompt": [2, 3], "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "multipl": [2, 3], "ll": 2, "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "paramet": [2, 3, 4], "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "compani": [2, 3, 4], "invest": [2, 4], "advic": 2, "mean": [2, 3], "market": [2, 3, 4], "could": [2, 3], "yield": 2, "conclus": 2, "exceedingli": 2, "compar": [2, 3], "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": 2, "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": [2, 4], "calcul": 2, "probabl": 2, "distribut": 2, "next": [2, 4], "set": [2, 3, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "environ": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 3, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "rang": [2, 3, 4], "chat": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "choic": 2, "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "we": [2, 3, 4], "length": [2, 4], "open": [2, 3, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "write": [2, 3], "singl": [2, 3, 4], "summari": 2, "2": [2, 3, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "sec": [2, 3, 4], "detail": [2, 3, 4], "busi": 2, "well": [2, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "report": [2, 3, 4], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "outlin": 2, "factor": [2, 3], "futur": 2, "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3], "simpl": [2, 3], "reveal": 2, "dramat": 2, "alter": 2, "wai": [2, 3, 4], "systemat": 2, "At": 2, "too": [2, 3], "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "approach": [2, 3, 4], "inadequ": 2, "implic": 2, "profound": 2, "one": [2, 3, 4], "an": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": 2, "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "scale": [2, 4], "up": [2, 3, 4], "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3], "aren": 2, "explicitli": 2, "grow": 2, "train": 2, "code": [2, 4], "against": 2, "specif": [2, 3], "wtb": 2, "22": 2, "fig": [2, 3], "4": [2, 3], "relationship": 2, "between": [2, 3], "linear": 2, "below": [2, 3], "certain": [2, 3], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "task": [2, 3, 4], "much": 2, "coax": 2, "them": [2, 3, 4], "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "what": [2, 4], "research": [2, 3], "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "dynam": [2, 3], "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": 2, "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3], "kind": 2, "account": 2, "uniqu": 2, "across": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": [2, 4], "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": 2, "resist": 2, "straightforward": [2, 3], "quantif": 2, "numer": 2, "score": 2, "judgment": 2, "inher": [2, 3, 4], "human": [2, 3, 4], "depend": 2, "contamin": 2, "carefulli": 2, "craft": [2, 4], "case": [2, 3], "expect": [2, 3, 4], "e": [2, 3, 4], "g": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": 2, "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": 2, "through": [2, 3], "annot": 2, "review": 2, "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "former": 2, "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "includ": [2, 3, 4], "thing": 2, "meet": 2, "close": 2, "ti": 2, "align": [2, 3], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "outcom": 2, "truth": 2, "option": [2, 3, 4], "standard": 2, "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "appli": [2, 3], "note": [2, 3], "further": [2, 3], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "easi": [2, 3], "two": [2, 3, 4], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "discuss": [2, 4], "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "launch": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": 2, "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "jason": 2, "wei": 2, "yi": 2, "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": 2, "arxiv": 2, "org": 2, "ab": 2, "2206": 2, "07682": 2, "onli": [3, 4], "those": [3, 4], "who": 3, "go": [3, 4], "far": 3, "possibli": 3, "find": 3, "eliot": 3, "short": 3, "charact": 3, "word": [3, 4], "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "tabl": 3, "show": [3, 4], "4096": 3, "16384": 3, "contrari": 3, "might": [3, 4], "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "sentenc": [3, 4], "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "7": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "6": [3, 4], "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "4o": [3, 4], "mini": [3, 4], "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "8": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": 3, "due": 3, "disrupt": 3, "flow": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "especi": 3, "true": [3, 4], "educ": 3, "tool": 3, "creation": 3, "address": [3, 4], "feasibl": 3, "effici": [3, 4], "section": [3, 4], "split": 3, "focus": [3, 4], "previou": 3, "For": [3, 4], "analyz": 3, "10k": 3, "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "number": [3, 4], "whether": 3, "overlap": 3, "want": 3, "some": [3, 4], "sure": 3, "semant": 3, "lost": 3, "path": 3, "mani": [3, 4], "computation": 3, "cheap": 3, "sinc": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "embed": 3, "level": 3, "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "class": [3, 4], "extract": [3, 4], "your": [3, 4], "made": 3, "talk": 3, "theme": 3, "topic": 3, "langchain": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "specifi": [3, 4], "arg": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "background": 3, "action": 3, "input_text": 3, "actual": 3, "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "construct": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "instanc": 3, "name": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "dict": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": 3, "part": 3, "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "yet": 3, "elif": 3, "last": [3, 4], "second": 3, "main": 3, "given": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "n": 3, "join": 3, "sampl": [3, 4], "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": 3, "analyst": 3, "readabl": 3, "move": 3, "insight": [3, 4], "local": [3, 4], "apple_report": 3, "w": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "identifi": 3, "luation": 3, "term": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "granular": 3, "assess": 3, "few": [3, 4], "interest": [3, 4], "high": 3, "smooth": 3, "upon": 3, "head": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "variou": 3, "revenu": [3, 4], "segment": [3, 4], "profit": [3, 4], "liquid": 3, "capit": [3, 4], "resourc": 3, "inclus": 3, "despit": [3, 4], "depth": 3, "wide": [3, 4], "expert": [3, 4], "nuanc": 3, "overlook": 3, "mitig": 3, "fit": 3, "within": [3, 4], "altern": 3, "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "significantli": 3, "enhanc": 3, "own": 3, "introduc": [3, 4], "layer": [3, 4], "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "contribut": 3, "overal": 3, "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "prepar": 3, "friendli": 3, "improv": [3, 4], "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "convei": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "essenti": 3, "condens": 3, "adapt": 3, "adjust": 3, "flexibl": [3, 4], "constrain": [3, 4], "collect": 3, "versatil": 3, "also": [3, 4], "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "b": 4, "cameron": 4, "excel": 4, "easili": 4, "databas": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "iphon": 4, "mac": 4, "ipad": 4, "impress": 4, "trend": 4, "notic": 4, "trillion": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "definit": 4, "figur": 4, "compli": 4, "regul": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "fals": 4, "clearli": 4, "obtain": 4, "deviat": 4, "lack": 4, "correct": 4, "emploi": 4, "schema": 4, "guidanc": 4, "blueprint": 4, "achiev": 4, "pars": 4, "nativ": 4, "regular": 4, "express": 4, "dedic": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "programmat": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "response_format": 4, "json_object": 4, "approxim": 4, "628": 4, "553": 4, "000": 4, "held": 4, "affili": 4, "sheer": 4, "mention": 4, "15": 4, "115": 4, "823": 4, "stock": 4, "outstand": 4, "octob": 4, "18": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "match": 4, "releas": 4, "suppli": 4, "so": 4, "worri": 4, "omit": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "basemodel": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "beta": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "convert": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "d": 4, "c": 4, "cupertino": 4, "wa": 4, "usabl": 4, "beg": 4, "simplifi": 4, "abstract": 4, "with_structured_output": 4, "directli": 4, "descript": 4, "runnabl": 4, "correspond": 4, "typeddict": 4, "dictionari": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "regex": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "being": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "sentiment": 4, "label": 4, "assist": 4, "special": 4, "neg": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "fine": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "quit": 4, "wrapper": 4, "fomer": 4, "wider": 4, "structuredoutputpars": 4, "overhead": 4, "infer": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "element": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "downstream": 4, "unlock": 4, "valuabl": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "1": [0, 1, 3], "python": 0, "setup": 0, "2": [0, 1], "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "3": [0, 1], "code": [0, 1], "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 3, 4], "chapter": 1, "non": [1, 2], "determin": 1, "eval": [1, 2], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "4": 1, "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "5": 1, "cost": [1, 3], "factor": 1, "6": 1, "safeti": 1, "concern": 1, "7": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "8": 1, "break": 1, "free": 1, "from": 1, "cloud": 1, "provid": [1, 4], "appendix": 1, "exampl": [1, 2, 3, 4], "b": 1, "tool": [1, 2, 4], "resourc": 1, "evalu": 2, "llm": 2, "base": 2, "applic": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "tradit": 2, "softwar": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "dataset": 2, "metric": 2, "layer": 2, "assess": 2, "leaderboard": 2, "rank": 2, "refer": [2, 3], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "gener": [3, 4], "long": 3, "form": 3, "step": 3, "usag": 3, "discuss": 3, "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "prompt": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "1. Python Environment Setup": [[0, "python-environment-setup"]], "2. API Keys Configuration": [[0, "api-keys-configuration"]], "3. Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "Taming Large Language Models": [[1, "taming-large-language-models"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Non-determinism & Evals": [[1, "chapter-2-non-determinism-evals"]], "Chapter 3: Wrestling with Structured Output": [[1, "chapter-3-wrestling-with-structured-output"]], "Chapter 4: Hallucination: The Reality Gap": [[1, "chapter-4-hallucination-the-reality-gap"]], "Chapter 5: The Cost Factor": [[1, "chapter-5-the-cost-factor"]], "Chapter 6: Safety Concerns": [[1, "chapter-6-safety-concerns"]], "Chapter 7: Size and Length Limitations": [[1, "chapter-7-size-and-length-limitations"]], "Chapter 8: Breaking Free from Cloud Providers": [[1, "chapter-8-breaking-free-from-cloud-providers"]], "Appendix A: Code Examples": [[1, "appendix-a-code-examples"]], "Appendix B: Tools and Resources": [[1, "appendix-b-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Examples: The Dataset": [[2, "examples-the-dataset"]], "Metrics: The Metrics Layer": [[2, "metrics-the-metrics-layer"]], "Evaluator: The Assessment Layer": [[2, "evaluator-the-assessment-layer"]], "Leaderboard: The Ranking Layer": [[2, "leaderboard-the-ranking-layer"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["markdown/intro", "markdown/toc", "notebooks/evals", "notebooks/output_size_limit", "notebooks/structured_output"], "filenames": ["markdown/intro.md", "markdown/toc.md", "notebooks/evals.ipynb", "notebooks/output_size_limit.ipynb", "notebooks/structured_output.ipynb"], "titles": ["1. Introduction", "Taming Large Language Models", "4. Challenges of Evaluating LLM-based Applications", "2. Output Size Limitations", "3. Wrestling with Structured Output"], "terms": {"am": 0, "alwai": [0, 2, 4], "do": [0, 2, 3, 4], "which": [0, 2, 3, 4], "cannot": [0, 2], "order": [0, 2, 4], "mai": [0, 2, 3, 4], "learn": [0, 2, 4], "how": [0, 2, 3, 4], "pablo": 0, "picasso": 0, "In": [0, 2, 3, 4], "recent": [0, 4], "year": [0, 2, 3, 4], "larg": [0, 2, 3, 4], "languag": [0, 2, 3, 4], "model": [0, 2], "llm": [0, 1, 3, 4], "have": [0, 2, 3, 4], "emerg": 0, "transform": [0, 4], "forc": [0, 2], "technologi": [0, 2, 3, 4], "promis": 0, "revolution": 0, "build": [0, 1, 2, 3], "product": [0, 2, 4], "interact": [0, 3, 4], "comput": [0, 2, 3], "from": [0, 2, 3, 4], "chatgpt": [0, 2], "github": 0, "copilot": 0, "claud": [0, 2, 3], "artifact": 0, "cursor": 0, "com": 0, "replit": 0, "other": [0, 2, 3, 4], "system": [0, 1, 2, 3, 4], "captur": [0, 2], "public": 0, "imagin": 0, "spark": 0, "gold": 0, "rush": 0, "ai": [0, 4], "power": [0, 2, 3, 4], "applic": [0, 3, 4], "howev": [0, 2, 3, 4], "beneath": 0, "surfac": 0, "technolog": 0, "revolut": 0, "li": 0, "complex": [0, 2, 3, 4], "landscap": [0, 2], "practition": 0, "must": [0, 2, 3, 4], "navig": 0, "As": [0, 3], "explor": [0, 2, 4], "engin": [0, 2, 4], "effort": 0, "requir": [0, 2, 3, 4], "manag": [0, 1, 3, 4], "handl": [0, 1, 2, 3, 4], "non": [0, 4], "determinist": [0, 1], "output": [0, 2], "prevent": [0, 4], "hallucin": [0, 4], "overst": 0, "while": [0, 2, 3, 4], "potenti": [0, 2, 3, 4], "remain": [0, 2, 3], "compel": 0, "understand": [0, 1, 2, 3, 4], "hidden": [0, 1], "cost": [0, 2], "reliabl": [0, 1, 2, 4], "enabl": [0, 2, 3, 4], "u": [0, 2, 4], "fulli": [0, 3], "har": [0, 3], "impact": [0, 2, 3], "capabl": [0, 2, 3], "ar": [0, 2, 4], "inde": 0, "remark": 0, "prevail": 0, "narr": 0, "often": [0, 2, 3, 4], "gloss": 0, "over": [0, 2, 3, 4], "fundament": [0, 2], "problem": [0, 1], "organ": [0, 2, 3], "face": [0, 4], "when": [0, 2, 3, 4], "real": [0, 2, 3, 4], "world": [0, 2, 4], "aim": [0, 3, 4], "bridg": 0, "gap": 0, "offer": [0, 2, 4], "clear": [0, 2, 4], "ei": 0, "examin": [0, 3], "pitfal": [0, 1], "work": [0, 2, 3, 4], "throughout": [0, 3, 4], "tackl": 0, "follow": [0, 2, 3, 4], "exhaust": 0, "list": [0, 2, 3, 4], "critic": [0, 2, 3], "behavior": [0, 1, 2], "unlik": [0, 2], "tradit": 0, "softwar": [0, 4], "can": [0, 2, 3, 4], "produc": [0, 2, 4], "differ": [0, 2, 3, 4], "ident": [0, 2], "input": [0, 1, 2, 3, 4], "make": [0, 2, 3, 4], "test": [0, 1, 2], "assur": 0, "particularli": [0, 2, 3, 4], "structur": [0, 2, 3], "un": 0, "struggl": [0, 4], "maintain": [0, 2, 3], "consist": [0, 1, 2, 3, 4], "format": [0, 1, 3, 4], "complic": 0, "integr": [0, 2, 4], "larger": [0, 2, 3], "error": [0, 1, 4], "more": [0, 2, 3, 4], "These": [0, 2, 3], "gener": [0, 1, 2], "plausibl": 0, "sound": 0, "entir": [0, 3], "fabric": 0, "inform": [0, 2, 3, 4], "creat": [0, 2, 3, 4], "signific": [0, 2, 3, 4], "risk": [0, 2, 3], "optim": [0, 1, 3], "The": [0, 3], "financi": [0, 2, 3, 4], "oper": [0, 2, 3], "base": [0, 3, 4], "quickli": [0, 3], "becom": [0, 2], "prohibit": 0, "without": [0, 2, 3, 4], "care": [0, 2], "methodologi": 0, "break": [0, 2, 3], "down": [0, 2, 3], "deal": 0, "new": [0, 2, 3, 4], "take": [0, 3, 4], "hand": [0, 3], "provid": [0, 2, 3], "concret": 0, "exampl": 0, "you": [0, 2, 3, 4], "run": [0, 4], "modifi": 0, "scenario": [0, 2], "solut": [0, 1, 3], "strategi": [0, 1, 2, 3], "best": [0, 1], "techniqu": [0, 1, 2, 3], "pattern": [0, 1, 2, 4], "anti": 0, "look": 0, "limit": [0, 2, 4], "our": [0, 2, 3, 4], "goal": [0, 3], "discourag": 0, "us": [0, 2, 3, 4], "robust": [0, 3], "implement": [0, 1, 2, 3], "By": [0, 3, 4], "upfront": 0, "better": [0, 3], "equip": 0, "leverag": [0, 3, 4], "effect": [0, 1, 2, 3, 4], "avoid": [0, 2, 4], "current": [0, 3], "discours": 0, "around": [0, 3, 4], "tend": 0, "toward": 0, "extrem": 0, "either": [0, 3], "uncrit": 0, "enthusiasm": 0, "wholesal": 0, "dismiss": 0, "focu": [0, 2, 3], "rather": [0, 2], "than": 0, "theoret": 0, "first": [0, 2, 3], "everi": 0, "concept": 0, "illustr": [0, 2, 3], "execut": [0, 2], "immedi": 0, "analysi": [0, 1, 2, 3], "balanc": [0, 2, 3], "both": [0, 2], "help": [0, 2, 3, 4], "reader": 0, "decis": [0, 4], "about": [0, 2, 3, 4], "design": [0, 3, 4], "lead": [0, 2, 3, 4], "initi": [0, 3], "technic": [0, 2, 3], "leader": 0, "architectur": [0, 3], "anyon": 0, "seek": 0, "typic": [0, 2, 3], "job": 0, "role": [0, 2, 3, 4], "platform": [0, 3], "backend": 0, "develop": [0, 2, 3, 4], "exist": 0, "ml": 0, "transit": [0, 2, 3], "overse": 0, "genai": 0, "motiv": 0, "need": [0, 2, 3, 4], "readi": 0, "desir": [0, 4], "overcom": [0, 3], "perform": [0, 1, 2, 3, 4], "ensur": [0, 2, 3, 4], "safeti": [0, 4], "after": [0, 3], "read": [0, 2, 3, 4], "abl": [0, 3, 4], "framework": [0, 2, 4], "deploi": [0, 3], "proper": 0, "safeguard": 0, "realist": 0, "estim": 0, "project": 0, "timelin": 0, "To": [0, 3, 4], "most": [0, 2, 3, 4], "should": [0, 2, 3, 4], "basic": [0, 2, 3], "program": [0, 2], "experi": [0, 2, 3], "access": [0, 4], "knowledg": [0, 2], "openai": [0, 2, 4], "anthrop": [0, 4], "similar": [0, 4], "grade": 0, "befor": 0, "dive": 0, "here": [0, 2, 3, 4], "": [0, 2, 3, 4], "get": [0, 2, 3, 4], "start": 0, "activ": 0, "virtual": 0, "m": 0, "venv": 0, "env": [0, 2, 3, 4], "sourc": [0, 2, 4], "bin": 0, "On": 0, "window": [0, 1], "script": [0, 1], "instal": [0, 4], "packag": 0, "pip": [0, 4], "r": [0, 2, 3, 4], "txt": [0, 2, 3, 4], "file": [0, 2, 3, 4], "root": 0, "directori": 0, "add": [0, 3], "sensit": 0, "openai_api_kei": 0, "your_openai_api_key_her": 0, "never": 0, "share": [0, 4], "commit": 0, "version": [0, 4], "control": [0, 2, 4], "It": [0, 3, 4], "contain": [0, 3], "kept": 0, "privat": 0, "clone": 0, "companion": 0, "git": 0, "http": [0, 2], "souzatharsi": 0, "tamingllm": 0, "cd": 0, "If": [0, 4], "encount": 0, "rate": [0, 2], "consid": [0, 2, 3, 4], "smaller": [0, 3, 4], "retri": [0, 4], "logic": [0, 3], "conflict": 0, "try": [0, 2, 4], "fresh": 0, "like": [0, 2, 3, 4], "poetri": 0, "check": 0, "page": 0, "known": [0, 2, 4], "now": [0, 3, 4], "let": [0, 2, 3], "begin": 0, "practic": [1, 2, 3], "guid": [1, 4], "python": [1, 4], "challeng": [1, 3], "why": 1, "thi": [1, 2, 3, 4], "book": 1, "matter": [1, 2], "overview": 1, "kei": [1, 4], "temperatur": [1, 3], "random": [1, 2], "evalu": [1, 3], "measur": [1, 2], "observ": [1, 2, 4], "log": 1, "monitor": 1, "debug": 1, "respons": [1, 2, 3, 4], "workflow": 1, "common": [1, 3, 4], "failur": 1, "mode": 1, "text": [1, 2, 3, 4], "inconsist": [1, 2, 4], "valid": [1, 2, 4], "recoveri": 1, "enforc": [1, 4], "type": [1, 2, 3, 4], "detect": [1, 4], "ground": [1, 2], "retriev": 1, "augment": [1, 2], "rag": 1, "context": [1, 2, 3, 4], "select": 1, "index": [1, 3], "vector": 1, "store": [1, 3], "chunk": 1, "method": [1, 2, 3, 4], "pipelin": 1, "token": [1, 2, 4], "cach": 1, "invalid": [1, 4], "predict": [1, 2, 4], "issu": [1, 2, 3, 4], "guard": 1, "content": 1, "filter": 1, "sanit": 1, "alert": 1, "constraint": [1, 3], "long": 1, "form": [1, 2, 4], "vendor": [1, 2], "lock": 1, "self": 1, "host": 1, "llama": 1, "llamafil": 1, "setup": 1, "usag": 1, "ollama": 1, "deploy": 1, "consider": 1, "migrat": 1, "complet": [1, 2, 3, 4], "util": [1, 3], "function": [1, 2, 3, 4], "configur": [1, 2], "templat": [1, 2, 3], "recommend": [1, 3], "librari": [1, 3, 4], "commun": 1, "surprisingli": 2, "all": [2, 3, 4], "greg": 2, "brockman": 2, "presid": 2, "One": 2, "i": [2, 3, 4], "natur": [2, 3, 4], "where": [2, 3], "same": [2, 3], "each": [2, 3], "time": [2, 3, 4], "thei": [2, 3, 4], "re": [2, 3], "queri": 2, "even": [2, 3, 4], "prompt": [2, 3], "data": [2, 3, 4], "characterist": 2, "strength": 2, "ask": [2, 4], "ani": [2, 3, 4], "question": [2, 4], "multipl": [2, 3], "ll": 2, "isn": 2, "t": [2, 3, 4], "bug": 2, "featur": [2, 4], "paramet": [2, 3, 4], "allow": [2, 3, 4], "creativ": [2, 4], "divers": [2, 3, 4], "incredibli": 2, "difficult": 2, "testabl": 2, "servic": [2, 3, 4], "compani": [2, 3, 4], "invest": [2, 4], "advic": 2, "mean": [2, 3], "market": [2, 3, 4], "could": [2, 3], "yield": 2, "conclus": 2, "exceedingli": 2, "compar": [2, 3], "regulatori": 2, "complianc": [2, 4], "guarante": [2, 4], "user": [2, 3, 4], "trust": 2, "affect": 2, "primari": 2, "determin": [2, 3, 4], "come": [2, 3, 4], "dure": [2, 4], "calcul": 2, "probabl": [2, 4], "distribut": [2, 4], "next": [2, 4], "set": [2, 3, 4], "nucleu": 2, "coher": [2, 3], "0": [2, 3, 4], "repetit": [2, 3], "1": [2, 4], "increas": [2, 3, 4], "incoher": 2, "dotenv": [2, 3, 4], "import": [2, 3, 4], "load_dotenv": [2, 3, 4], "o": [2, 3, 4], "load": [2, 3, 4], "environ": [2, 3, 4], "variabl": [2, 3, 4], "panda": 2, "pd": 2, "def": [2, 3, 4], "generate_respons": 2, "model_nam": [2, 3], "str": [2, 3, 4], "float": [2, 3], "attempt": [2, 3], "int": [2, 3], "3": [2, 3, 4], "datafram": 2, "demonstr": [2, 3, 4], "client": [2, 4], "result": [2, 3, 4], "temp": 2, "rang": [2, 3, 4], "chat": [2, 3, 4], "messag": [2, 4], "max_token": 2, "50": 2, "append": [2, 3], "choic": 2, "displai": 2, "group": [2, 3], "df_result": 2, "print": [2, 3, 4], "f": [2, 3, 4], "ntemperatur": 2, "40": 2, "temp_respons": 2, "_": 2, "row": 2, "iterrow": 2, "return": [2, 3, 4], "max_length": [2, 4], "10000": [2, 3, 4], "we": [2, 3, 4], "length": [2, 4], "open": [2, 3, 4], "appl": [2, 3, 4], "sec_fil": [2, 4], "gpt": [2, 3, 4], "5": [2, 3, 4], "turbo": [2, 3, 4], "write": [2, 3], "singl": [2, 3, 4], "summari": 2, "2": [2, 3, 4], "inc": [2, 3, 4], "its": [2, 3, 4], "10": [2, 3, 4], "k": [2, 3, 4], "fiscal": [2, 3], "end": [2, 3], "septemb": [2, 3], "28": [2, 3], "2024": [2, 3, 4], "sec": [2, 3, 4], "detail": [2, 3, 4], "busi": 2, "well": [2, 4], "season": 2, "issuer": 2, "california": [2, 4], "manufactur": 2, "smartphon": 2, "person": [2, 4], "tablet": 2, "wearabl": [2, 4], "accessori": 2, "innov": [2, 3], "report": [2, 3, 4], "condit": 2, "secur": [2, 3], "exchang": [2, 3], "commiss": [2, 3], "outlin": 2, "factor": [2, 3], "futur": 2, "invdestacksmeticsisdict": 2, "setispect": 2, "20cyan": 2, "evaluationseld": 2, "anvis": 2, "droitent": 2, "discernminerv": 2, "versbobprefvers": 2, "vo\u8be5": 2, "option\u548c": 2, "meio": 2, "forecast": 2, "\u0432\u0440\u0435\u043ccisco": 2, "dellaischenpoihscap": 2, "geme": 2, "gettim": 2, "comprehens": [2, 3], "simpl": [2, 3], "reveal": 2, "dramat": 2, "alter": 2, "wai": [2, 3, 4], "systemat": 2, "At": 2, "too": [2, 3], "rigid": 2, "vari": 2, "less": 2, "wildli": 2, "approach": [2, 3, 4], "inadequ": 2, "implic": 2, "profound": 2, "one": [2, 3, 4], "an": [2, 3, 4], "radic": 2, "reli": [2, 4], "grappl": 2, "probabilist": 2, "lower": 2, "seem": [2, 4], "safer": 2, "don": [2, 3, 4], "elimin": 2, "underli": [2, 4], "uncertainti": 2, "mere": 2, "mask": 2, "highlight": [2, 3, 4], "paradigm": 2, "aspect": [2, 3, 4], "beyond": 2, "present": [2, 3, 4], "anoth": 2, "fascin": 2, "abil": [2, 4], "spontan": 2, "aris": 2, "scale": [2, 4], "up": [2, 3, 4], "size": [2, 4], "answer": [2, 3, 4], "reason": [2, 3], "aren": 2, "explicitli": 2, "grow": 2, "train": 2, "code": [2, 4], "against": 2, "specif": [2, 3], "wtb": 2, "22": 2, "fig": [2, 3], "4": [2, 3], "relationship": 2, "between": [2, 3], "linear": 2, "below": [2, 3], "certain": [2, 3, 4], "threshold": 2, "absent": 2, "simpli": [2, 3, 4], "task": [2, 3, 4], "much": 2, "coax": 2, "them": [2, 3, 4], "out": [2, 3], "onc": [2, 3], "reach": [2, 3, 4], "point": [2, 3], "journei": 2, "suddenli": 2, "manifest": 2, "what": [2, 4], "research": [2, 3], "call": [2, 3, 4], "phase": 2, "shift": 2, "inabl": 2, "unpredict": [2, 4], "stand": 2, "stark": 2, "contrast": 2, "deliber": 2, "convent": 2, "stabl": 2, "suit": 2, "defin": [2, 3, 4], "accept": 2, "criteria": 2, "contend": 2, "constantli": 2, "7b": 2, "70b": 2, "ha": [2, 4], "dynam": [2, 3], "rethink": 2, "custom": [2, 4], "support": [2, 4], "chatbot": 2, "would": [2, 3], "refund": 2, "request": [2, 3, 4], "track": 2, "verifi": 2, "But": 2, "just": [2, 3, 4], "predefin": [2, 4], "convers": [2, 3, 4], "appropri": [2, 3, 4], "emot": 2, "rais": [2, 3], "weren": 2, "evolv": [2, 3], "accuraci": 2, "subject": 2, "qualiti": [2, 3], "kind": 2, "account": 2, "uniqu": 2, "across": 2, "sever": [2, 3, 4], "dimens": 2, "necessirali": [2, 4], "pre": 2, "extend": 2, "explicit": [2, 4], "usual": 2, "precis": 2, "involv": 2, "resist": 2, "straightforward": [2, 3], "quantif": 2, "numer": 2, "score": [2, 4], "judgment": 2, "inher": [2, 3, 4], "human": [2, 3, 4], "depend": 2, "contamin": 2, "carefulli": [2, 4], "craft": [2, 4], "case": [2, 3], "expect": [2, 3, 4], "e": [2, 3, 4], "g": [2, 3, 4], "unit": [2, 3], "massiv": 2, "internet": 2, "alreadi": 2, "seen": 2, "memor": 2, "artifici": 2, "inflat": 2, "curat": 2, "truli": 2, "unseen": 2, "rigor": 2, "cross": 2, "benchmark": 2, "evolut": 2, "continu": [2, 3, 4], "advanc": [2, 3], "longitudin": 2, "comparison": 2, "obsolet": 2, "older": 2, "autom": [2, 4], "demand": 2, "oversight": 2, "bias": [2, 4], "through": [2, 3], "annot": 2, "review": 2, "process": [2, 3, 4], "mostli": 2, "distinct": 2, "versu": 2, "latter": 2, "foundat": [2, 3], "purpos": [2, 4], "former": 2, "tailor": 2, "particular": [2, 4], "combin": [2, 3], "associ": [2, 3], "solv": [2, 4], "That": [2, 4], "differenti": 2, "becaus": 2, "chang": 2, "scope": [2, 3], "includ": [2, 3, 4], "thing": 2, "meet": 2, "close": 2, "ti": 2, "align": [2, 3], "object": [2, 4], "A": [2, 3], "great": [2, 4], "doesn": [2, 3], "three": 2, "app": 2, "imag": 2, "audio": 2, "etc": [2, 4], "outcom": 2, "truth": 2, "option": [2, 3, 4], "standard": 2, "repres": [2, 4], "palm": 2, "individu": [2, 3], "target": [2, 4], "appli": [2, 3], "note": [2, 3], "further": [2, 3], "see": [2, 4], "avail": [2, 3, 4], "addition": 2, "shown": 2, "fix": [2, 3], "default": [2, 4], "quantifi": 2, "easi": [2, 3], "two": [2, 3, 4], "addit": [2, 3], "quantit": 2, "among": 2, "per": [2, 3], "aggreg": 2, "heavili": 2, "plan": 2, "pertain": 2, "previous": [2, 3], "discuss": [2, 4], "doe": [2, 3, 4], "cover": [2, 3], "edg": 2, "good": [2, 4], "bia": 2, "separ": [2, 3], "synthet": 2, "updat": [2, 3], "reflect": 2, "post": 2, "launch": 2, "fair": 2, "timeout": 2, "variat": 2, "maxim": 2, "valu": [2, 3, 4], "success": 2, "inter": 2, "rater": 2, "scalabl": [2, 3], "weight": 2, "rel": 2, "priorit": 2, "normal": [2, 4], "absolut": [2, 4], "fail": 2, "confid": [2, 4], "interv": 2, "veri": 2, "tier": 2, "hollist": 2, "built": [2, 4], "mind": 2, "x": 2, "fast": 2, "promot": 2, "rapid": 2, "experiment": [2, 4], "iter": [2, 3], "final": [2, 3, 4], "keep": [2, 3], "itself": 2, "confirm": 2, "vi": 2, "jason": 2, "wei": 2, "yi": 2, "tai": 2, "rishi": 2, "bommasani": 2, "colin": 2, "raffel": 2, "barret": 2, "zoph": 2, "sebastian": 2, "borgeaud": 2, "dani": 2, "yogatama": 2, "maarten": 2, "bosma": 2, "denni": 2, "zhou": 2, "donald": 2, "metzler": 2, "ed": 2, "h": 2, "chi": 2, "tatsunori": 2, "hashimoto": 2, "oriol": 2, "vinyal": 2, "perci": 2, "liang": 2, "jeff": 2, "dean": 2, "william": 2, "fedu": 2, "2022": 2, "url": 2, "arxiv": 2, "org": 2, "ab": 2, "2206": 2, "07682": 2, "onli": [3, 4], "those": [3, 4], "who": 3, "go": [3, 4], "far": 3, "possibli": 3, "find": 3, "eliot": 3, "short": 3, "charact": 3, "word": [3, 4], "english": 3, "rule": 3, "thumb": 3, "\u00be": 3, "max_output_token": 3, "modern": 3, "maximum": 3, "tabl": 3, "show": [3, 4], "4096": 3, "16384": 3, "contrari": 3, "might": [3, 4], "summar": 3, "surpass": 3, "instead": [3, 4], "stop": 3, "mid": 3, "sentenc": [3, 4], "truncat": 3, "max_input_token": 3, "input_cost_per_token": 3, "output_cost_per_token": 3, "meta": 3, "llama3": 3, "11b": 3, "instruct": [3, 4], "v1": 3, "128000": 3, "5e": 3, "7": 3, "sonnet": 3, "20241022": 3, "8192": 3, "200000": 3, "3e": 3, "6": [3, 4], "0613": 3, "6e": 3, "04": 3, "09": 3, "1e": 3, "4o": [3, 4], "mini": [3, 4], "gemini": 3, "flash": 3, "002": 3, "1048576": 3, "8": 3, "pro": 3, "2097152": 3, "05e": 3, "pose": [3, 4], "incomplet": 3, "extens": [3, 4], "articl": 3, "abruptli": 3, "cut": 3, "off": 3, "due": 3, "disrupt": 3, "flow": 3, "shallow": 3, "thorough": 3, "receiv": 3, "partial": 3, "dissatisfact": 3, "frustrat": 3, "especi": 3, "true": [3, 4], "educ": 3, "tool": 3, "creation": 3, "address": [3, 4], "feasibl": 3, "effici": [3, 4], "section": [3, 4], "split": 3, "focus": [3, 4], "previou": 3, "For": [3, 4], "analyz": 3, "10k": 3, "schemat": 3, "represent": 3, "diagram": 3, "charactertextsplitt": 3, "tiktoken": 3, "sequenti": 3, "chain": 3, "newlin": 3, "There": 3, "situat": 3, "broadli": [3, 4], "decid": 3, "number": [3, 4], "whether": 3, "overlap": 3, "want": 3, "some": [3, 4], "sure": 3, "semant": 3, "lost": 3, "path": 3, "mani": [3, 4], "computation": 3, "cheap": 3, "sinc": 3, "speciali": 3, "awar": 3, "advantag": [3, 4], "sophist": 3, "embed": 3, "level": 3, "naiv": 3, "period": 3, "nltk": 3, "spaci": 3, "recurs": 3, "divid": 3, "hierarch": 3, "manner": [3, 4], "class": [3, 4], "extract": [3, 4], "your": [3, 4], "made": 3, "talk": 3, "theme": 3, "topic": 3, "langchain": 3, "count": 3, "get_chunk": 3, "chunk_siz": 3, "chunk_overlap": 3, "specifi": [3, 4], "arg": 3, "langchain_text_splitt": 3, "text_splitt": 3, "from_tiktoken_encod": 3, "split_text": 3, "serv": [3, 4], "persona": 3, "assum": 3, "background": 3, "action": 3, "input_text": 3, "actual": 3, "langchain_cor": [3, 4], "prompttempl": 3, "get_base_prompt_templ": 3, "base_prompt": 3, "from_templ": 3, "llmchain": 3, "construct": 3, "togeth": 3, "parser": [3, 4], "output_pars": 3, "stroutputpars": 3, "langchain_commun": 3, "chat_model": 3, "chatlitellm": 3, "get_llm_chain": 3, "prompt_templ": [3, 4], "instanc": 3, "name": [3, 4], "llm_chain": [3, 4], "api_key_label": 3, "upper": 3, "_api_kei": 3, "api_kei": 3, "get_dynamic_prompt_templ": 3, "dict": 3, "get_dynamic_prompt_param": 3, "prompt_param": 3, "part_idx": 3, "total_part": 3, "chat_context": 3, "origin": 3, "part": 3, "total": [3, 4], "param": 3, "dynamic_prompt_param": 3, "copi": 3, "save": 3, "introduct": 3, "yet": 3, "elif": 3, "last": [3, 4], "second": 3, "main": 3, "given": [3, 4], "els": 3, "merg": 3, "concaten": 3, "generate_report": 3, "input_cont": 3, "llm_model_nam": 3, "report_part": 3, "num_part": 3, "len": 3, "dinam": 3, "priovid": 3, "enumer": 3, "invok": [3, 4], "cummul": 3, "n": 3, "join": 3, "sampl": [3, 4], "max_chunk_s": 3, "max_chunk_overlap": 3, "latest": 3, "analyst": 3, "readabl": 3, "move": 3, "insight": [3, 4], "local": [3, 4], "apple_report": 3, "w": 3, "300": 3, "posit": [3, 4], "disclos": 3, "state": 3, "identifi": 3, "luation": 3, "term": 3, "oblig": 3, "cash": 3, "disciplin": 3, "deeper": 3, "granular": 3, "assess": 3, "few": [3, 4], "interest": [3, 4], "high": 3, "smooth": 3, "upon": 3, "head": 3, "subhead": 3, "clariti": 3, "document": [3, 4], "adher": [3, 4], "variou": 3, "revenu": [3, 4], "segment": [3, 4], "profit": [3, 4], "liquid": 3, "capit": [3, 4], "resourc": 3, "inclus": 3, "despit": [3, 4], "depth": 3, "wide": [3, 4], "expert": [3, 4], "nuanc": 3, "overlook": 3, "mitig": 3, "fit": 3, "within": [3, 4], "altern": 3, "meaning": [3, 4], "preserv": 3, "easier": [3, 4], "preprocess": 3, "significantli": 3, "enhanc": 3, "own": 3, "introduc": [3, 4], "layer": [3, 4], "necessit": 3, "meticul": 3, "retain": 3, "necessari": 3, "seamlessli": 3, "circumv": 3, "therebi": 3, "contribut": 3, "overal": 3, "escal": 3, "frequenc": 3, "volum": 3, "bottleneck": 3, "latenc": 3, "reduc": 3, "prepar": 3, "friendli": 3, "improv": [3, 4], "mustafa": 3, "suleyman": 3, "infinit": 3, "memori": 3, "convei": 3, "amount": [3, 4], "fewer": 3, "compress": 3, "progress": 3, "essenti": 3, "condens": 3, "adapt": 3, "adjust": [3, 4], "flexibl": [3, 4], "constrain": [3, 4], "collect": 3, "versatil": 3, "also": [3, 4], "drive": 3, "grace": 3, "fallback": 3, "empow": 3, "crucial": [3, 4], "stai": 3, "full": [3, 4], "splitter": 3, "freedom": 4, "thrive": 4, "julia": 4, "b": 4, "cameron": 4, "excel": 4, "easili": 4, "databas": 4, "sometim": 4, "unstructur": 4, "notebook": 4, "overrid": 4, "response_cont": 4, "wow": 4, "lot": 4, "breakdown": 4, "stream": 4, "portfolio": 4, "iphon": 4, "mac": 4, "ipad": 4, "impress": 4, "trend": 4, "notic": 4, "trillion": 4, "march": 4, "29": 4, "huge": 4, "investor": 4, "definit": 4, "figur": 4, "compli": 4, "regul": 4, "ye": 4, "accur": 4, "date": 4, "transpar": 4, "industri": 4, "serious": 4, "is_json": 4, "myjson": 4, "except": 4, "valueerror": 4, "fals": 4, "clearli": 4, "obtain": 4, "deviat": 4, "lack": 4, "correct": 4, "emploi": 4, "schema": 4, "guidanc": 4, "blueprint": 4, "achiev": 4, "pars": 4, "nativ": 4, "regular": 4, "express": 4, "dedic": 4, "json_format": 4, "person1": 4, "alic": 4, "q1": 4, "20": 4, "person2": 4, "bob": 4, "net": 4, "margin": 4, "materi": 4, "though": 4, "suffici": 4, "nest": 4, "restrict": 4, "programmat": 4, "via": 4, "unend": 4, "whitespac": 4, "until": 4, "forget": 4, "throw": 4, "string": 4, "appear": 4, "somewher": 4, "response_format": 4, "json_object": 4, "approxim": 4, "628": 4, "553": 4, "000": 4, "held": 4, "affili": 4, "sheer": 4, "mention": 4, "15": 4, "115": 4, "823": 4, "stock": 4, "outstand": 4, "octob": 4, "18": 4, "circul": 4, "plai": 4, "googl": 4, "vertex": 4, "match": 4, "releas": 4, "suppli": 4, "so": 4, "worri": 4, "omit": 4, "enum": 4, "benefit": 4, "No": 4, "incorrectli": 4, "refus": 4, "simpler": 4, "strongli": 4, "entiti": 4, "ii": 4, "place": 4, "doc": 4, "07": 4, "08": 4, "06": 4, "later": 4, "basemodel": 4, "secextract": 4, "mentioned_ent": 4, "mentioned_plac": 4, "extract_from_sec_fil": 4, "sec_filing_text": 4, "beta": 4, "explan": 4, "hint": 4, "send": 4, "attribut": 4, "conform": 4, "prompt_extract": 4, "convert": 4, "sec_extract": 4, "nasdaq": 4, "llc": 4, "washington": 4, "d": 4, "c": 4, "cupertino": 4, "wa": 4, "usabl": 4, "beg": 4, "simplifi": 4, "abstract": 4, "with_structured_output": 4, "directli": 4, "descript": 4, "runnabl": 4, "correspond": 4, "typeddict": 4, "dictionari": 4, "qu": 4, "langchain_openai": 4, "chatopenai": 4, "chatprompttempl": 4, "extract_from_sec_filing_langchain": 4, "structured_llm": 4, "from_messag": 4, "sec_extraction_langchain": 4, "found": 4, "under": 4, "hood": 4, "logit": 4, "raw": 4, "neural": 4, "network": 4, "prefer": 4, "fine": 4, "grain": 4, "regex": 4, "qwen2": 4, "5b": 4, "lightweight": 4, "alibaba": 4, "cloud": 4, "strong": 4, "small": 4, "being": 4, "enough": 4, "hug": 4, "qwen": 4, "top": 4, "100": 4, "sentiment": 4, "label": 4, "assist": 4, "special": 4, "neg": 4, "back": 4, "pass": 4, "modul": 4, "sec_extraction_outlin": 4, "zsp": 4, "zicorp": 4, "phenomenon": 4, "were": 4, "tune": 4, "simplic": 4, "v": 4, "greater": 4, "steeper": 4, "curv": 4, "quit": 4, "wrapper": 4, "fomer": 4, "wider": 4, "structuredoutputpars": 4, "overhead": 4, "infer": 4, "done": 4, "know": 4, "exactli": 4, "field": 4, "element": 4, "chanc": 4, "connect": 4, "highli": 4, "encourag": 4, "correctli": 4, "downstream": 4, "unlock": 4, "valuabl": 4}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"introduct": [0, 1], "content": [0, 2, 3, 4], "core": 0, "challeng": [0, 2, 4], "we": 0, "ll": 0, "address": 0, "A": [0, 1, 4], "practic": [0, 4], "approach": 0, "note": 0, "perspect": 0, "who": 0, "thi": 0, "book": 0, "i": 0, "For": 0, "outcom": 0, "prerequisit": 0, "set": 0, "up": 0, "your": 0, "environ": 0, "1": [0, 1, 3], "python": 0, "setup": 0, "2": [0, 1], "api": [0, 4], "kei": [0, 2, 3], "configur": 0, "3": [0, 1], "code": [0, 1], "repositori": 0, "troubleshoot": 0, "common": 0, "issu": 0, "tame": 1, "larg": 1, "languag": 1, "model": [1, 3, 4], "chapter": 1, "non": [1, 2], "determin": 1, "eval": [1, 2], "wrestl": [1, 4], "structur": [1, 4], "output": [1, 3, 4], "4": 1, "hallucin": 1, "The": [1, 2, 4], "realiti": 1, "gap": 1, "5": 1, "cost": [1, 3], "factor": 1, "6": 1, "safeti": 1, "concern": 1, "7": 1, "size": [1, 3], "length": [1, 3], "limit": [1, 3], "8": 1, "break": 1, "free": 1, "from": 1, "cloud": 1, "provid": [1, 4], "appendix": 1, "exampl": [1, 2, 3, 4], "b": 1, "tool": [1, 2, 4], "resourc": 1, "evalu": 2, "llm": 2, "base": 2, "applic": 2, "determinist": 2, "machin": 2, "temperatur": 2, "sampl": 2, "spectrum": 2, "emerg": 2, "properti": 2, "problem": [2, 3, 4], "statement": [2, 3, 4], "tradit": 2, "softwar": 2, "v": 2, "design": 2, "conceptu": 2, "overview": 2, "consider": [2, 3], "compon": 2, "dataset": 2, "metric": 2, "layer": 2, "assess": 2, "leaderboard": 2, "rank": 2, "refer": [2, 3], "what": 3, "ar": 3, "token": 3, "comparison": 3, "across": 3, "chunk": 3, "contextu": 3, "link": 3, "gener": [3, 4], "long": 3, "form": 3, "step": 3, "usag": 3, "discuss": 3, "implic": 3, "futur": 3, "conclus": [3, 4], "solut": 4, "strategi": 4, "techniqu": 4, "One": 4, "shot": 4, "prompt": 4, "specif": 4, "json": 4, "mode": 4, "langchain": 4, "outlin": 4, "simpl": 4, "multipl": 4, "choic": 4, "pydant": 4, "compar": 4, "best": 4}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"Introduction": [[0, "introduction"]], "Contents": [[0, "contents"], [2, "contents"], [3, "contents"], [4, "contents"]], "Core Challenges We\u2019ll Address": [[0, "core-challenges-we-ll-address"]], "A Practical Approach": [[0, "a-practical-approach"]], "A Note on Perspective": [[0, "a-note-on-perspective"]], "Who This Book Is For": [[0, "who-this-book-is-for"]], "Outcomes": [[0, "outcomes"]], "Prerequisites": [[0, "prerequisites"]], "Setting Up Your Environment": [[0, "setting-up-your-environment"]], "1. Python Environment Setup": [[0, "python-environment-setup"]], "2. API Keys Configuration": [[0, "api-keys-configuration"]], "3. Code Repository": [[0, "code-repository"]], "Troubleshooting Common Issues": [[0, "troubleshooting-common-issues"]], "Taming Large Language Models": [[1, "taming-large-language-models"]], "Chapter 1: Introduction": [[1, "chapter-1-introduction"]], "Chapter 2: Non-determinism & Evals": [[1, "chapter-2-non-determinism-evals"]], "Chapter 3: Wrestling with Structured Output": [[1, "chapter-3-wrestling-with-structured-output"]], "Chapter 4: Hallucination: The Reality Gap": [[1, "chapter-4-hallucination-the-reality-gap"]], "Chapter 5: The Cost Factor": [[1, "chapter-5-the-cost-factor"]], "Chapter 6: Safety Concerns": [[1, "chapter-6-safety-concerns"]], "Chapter 7: Size and Length Limitations": [[1, "chapter-7-size-and-length-limitations"]], "Chapter 8: Breaking Free from Cloud Providers": [[1, "chapter-8-breaking-free-from-cloud-providers"]], "Appendix A: Code Examples": [[1, "appendix-a-code-examples"]], "Appendix B: Tools and Resources": [[1, "appendix-b-tools-and-resources"]], "Challenges of Evaluating LLM-based Applications": [[2, "challenges-of-evaluating-llm-based-applications"]], "Non-Deterministic Machines": [[2, "non-deterministic-machines"]], "Temperature and Sampling": [[2, "temperature-and-sampling"]], "The Temperature Spectrum": [[2, "the-temperature-spectrum"]], "Emerging Properties": [[2, "emerging-properties"]], "Problem Statement": [[2, "problem-statement"], [3, "problem-statement"], [4, "problem-statement"]], "Evals of Traditional Software vs LLMs": [[2, "evals-table"]], "Evals Design": [[2, "evals-design"]], "Conceptual Overview": [[2, "conceptual-overview"]], "Design Considerations": [[2, "design-considerations"]], "Key Components": [[2, "key-components"]], "Examples: The Dataset": [[2, "examples-the-dataset"]], "Metrics: The Metrics Layer": [[2, "metrics-the-metrics-layer"]], "Evaluator: The Assessment Layer": [[2, "evaluator-the-assessment-layer"]], "Leaderboard: The Ranking Layer": [[2, "leaderboard-the-ranking-layer"]], "Tools": [[2, "tools"]], "References": [[2, "references"], [3, "references"]], "Output Size Limitations": [[3, "output-size-limitations"]], "What are Token Limits?": [[3, "what-are-token-limits"]], "Token Cost and Length Limitation Comparison Across Key Models": [[3, "token-cost-table"]], "Content Chunking with Contextual Linking": [[3, "content-chunking-with-contextual-linking"]], "Generating long-form content": [[3, "generating-long-form-content"]], "Step 1: Chunking the Content": [[3, "step-1-chunking-the-content"]], "Example Usage": [[3, "example-usage"]], "Discussion": [[3, "discussion"]], "Implications": [[3, "implications"]], "Future Considerations": [[3, "future-considerations"]], "Conclusion": [[3, "conclusion"], [4, "conclusion"]], "Wrestling with Structured Output": [[4, "wrestling-with-structured-output"]], "The Structured Output Challenges": [[4, "the-structured-output-challenges"]], "Solutions": [[4, "solutions"]], "Strategies": [[4, "strategies"]], "Techniques and Tools": [[4, "techniques-and-tools"]], "One-Shot Prompts": [[4, "one-shot-prompts"]], "Structured Output with Provider-Specific APIs": [[4, "structured-output-with-provider-specific-apis"]], "JSON Mode": [[4, "json-mode"]], "LangChain": [[4, "langchain"]], "Outlines": [[4, "outlines"]], "A Simple Example: Multiple Choice Generation": [[4, "a-simple-example-multiple-choice-generation"]], "Pydantic model": [[4, "pydantic-model"]], "Comparing Solutions": [[4, "comparing-solutions"]], "Best Practices": [[4, "best-practices"]]}, "indexentries": {}}) \ No newline at end of file diff --git a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb index 31d726f..3e5a46f 100644 --- a/tamingllms/_build/jupyter_execute/markdown/intro.ipynb +++ b/tamingllms/_build/jupyter_execute/markdown/intro.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "93311d57", + "id": "10dc718d", "metadata": {}, "source": [ "(intro)=\n", diff --git a/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb b/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb index 8865243..f2cb600 100644 --- a/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb +++ b/tamingllms/_build/jupyter_execute/notebooks/structured_output.ipynb @@ -587,7 +587,7 @@ "source": [ "### Outlines\n", "\n", - "Outlines is a library specifically focused on structured text generation from LLMs. It provides several powerful features:\n", + "Outlines is a library specifically focused on structured text generation from LLMs. Under the hood, Outlines works by adjusting the probability distribution of the model's output logits - the raw scores from the final layer of the neural network that are normally converted into text tokens. By introducing carefully crafted logit biases, Outlines can guide the model to prefer certain tokens over others, effectively constraining its outputs to a predefined set of valid options. This provides fine-grained control over the model's generation process. In that way, Outlines provides several powerful features:\n", "\n", "* **Multiple Choice Generation**: Restrict the LLM output to a predefined set of options.\n", "* **Regex-based structured generation**: Guide the generation process using regular expressions.\n", diff --git a/tamingllms/notebooks/structured_output.ipynb b/tamingllms/notebooks/structured_output.ipynb index 8e4c876..70c8ef2 100644 --- a/tamingllms/notebooks/structured_output.ipynb +++ b/tamingllms/notebooks/structured_output.ipynb @@ -587,7 +587,7 @@ "source": [ "### Outlines\n", "\n", - "Outlines is a library specifically focused on structured text generation from LLMs. It provides several powerful features:\n", + "Outlines is a library specifically focused on structured text generation from LLMs. Under the hood, Outlines works by adjusting the probability distribution of the model's output logits - the raw scores from the final layer of the neural network that are normally converted into text tokens. By introducing carefully crafted logit biases, Outlines can guide the model to prefer certain tokens over others, effectively constraining its outputs to a predefined set of valid options. This provides fine-grained control over the model's generation process. In that way, Outlines provides several powerful features:\n", "\n", "* **Multiple Choice Generation**: Restrict the LLM output to a predefined set of options.\n", "* **Regex-based structured generation**: Guide the generation process using regular expressions.\n",