By integrating the OpenAI SDK with Literal AI’s instrumentation, you can also effectively monitor message-based inference servers such as LMStudio, vLLM or HuggingFace, ensuring that you have full visibility into the performance and usage of your AI models.
from literalai import LiteralClientlc = LiteralClient()lc.instrument_openai()# Example: reuse your existing OpenAI setupfrom openai import OpenAI# Point to the local serverclient = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")completion = literalai_client.chat.completions.create( model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_S.gguf", messages=[{"role":"system","content":"Always answer in rhymes."},{"role":"user","content":"Introduce yourself."}], temperature=0.7,)print(completion.choices[0].message)