AI-Providers

Table of Contents

Methods

Abstract Methods

ask()

@abstractmethod
def ask(self, prompt: str) -> str:
  """Abstract method that must be implemented by subclasses"""
  # Check for stop signal at the beginning
  if hasattr(self, '_stop_event') and self._stop_event.is_set():
      return "Request was cancelled due to timeout"

  message = {"role": "user", "content": prompt}
  self._messages.append(message)
  # Don't return anything - let subclasses handle the actual response
  return ""  # Return empty string instead of None

name()

@property
@abstractmethod
def name(self) -> str:
    """Abstract method that must be implemented by subclasses"""
    pass

_call_api()

@abstractmethod
def _call_api(self, message: list[dict[str, str]] | str) -> str:
    """Abstract method that must be implemented by subclasses to handle actual API calls"""
    pass
Why?

See below example sent by @abhayagovind

import os
import requests

token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("GITHUB_TOKEN not found.")

url = "https://models.github.ai/inference/chat/completions"
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
    "Accept": "application/json",
    "X-GitHub-Api-Version": "2023-11-28"
}

messages = [
    {"role": "system", "content": "You are a helpful AI voice/text assistant"}
]

print("๐Ÿค– AI Assistant is ready! Type 'exit' to quit.\n")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("๐Ÿ‘‹ Goodbye!")
        break


    messages.append({"role": "user", "content": user_input})

    payload = {
        "model": "meta/Llama-4-Scout-17B-16E-Instruct",
        "messages": messages,
        "max_tokens": 500,
        "temperature": 0.7
    }

    response = requests.post(url, headers=headers, json=payload).json()

    ai_reply = response["choices"][0]["message"]["content"]

    print("AI:", ai_reply, "\n")


    messages.append({"role": "assistant", "content": ai_reply})
Explanation
  1. She imports required modules.
import os
import requests

  1. Obtains the GITHUB_TOKEN from .env file (environment variable)
token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("GITHUB_TOKEN not found.")
  1. Sets url and headers for the API request
url = "https://models.github.ai/inference/chat/completions"
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
    "Accept": "application/json",
    "X-GitHub-Api-Version": "2023-11-28"
}
  1. Creates a message list for history.
messages = [
    {"role": "system", "content": "You are a helpful AI voice/text assistant"}
]
  1. Uses a while loop and continuesly asks for user input until "exit" or "quit" is typed.
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("๐Ÿ‘‹ Goodbye!")
        break

    messages.append({"role": "user", "content": user_input})

    payload = {
        "model": "meta/Llama-4-Scout-17B-16E-Instruct",
        "messages": messages,
        "max_tokens": 500,
        "temperature": 0.7
    }

    response = requests.post(url, headers=headers, json=payload).json()

    ai_reply = response["choices"][0]["message"]["content"]

    print("AI:", ai_reply, "\n")


    messages.append({"role": "assistant", "content": ai_reply})

Features:

  1. History is maintained
Issues
  1. if the response is is invalid or the API server is down, it will crash.
  2. I would have to run this program directly ,

like

python llama.py
Minor Modification

Lets look at a refactored version

import os
import requests

token = os.getenv("GITHUB_TOKEN")
if not token:
    raise ValueError("GITHUB_TOKEN not found.")

url = "https://models.github.ai/inference/chat/completions"
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
    "Accept": "application/json",
    "X-GitHub-Api-Version": "2023-11-28"
}

messages = [
    {"role": "system", "content": "You are a helpful AI voice/text assistant"}
]

def ask(prompt: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
    user_input = prompt
    if user_input.lower() in ["exit", "quit"]:
        return ""
    messages.append({"role": "user", "content": user_input})
    payload = {
        "model": "meta/Llama-4-Scout-17B-16E-Instruct",
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    
    response = requests.post(url, headers=headers, json=payload).json()
    response.raise_for_status()  
    ai_reply = response["choices"][0]["message"]["content"]

    print("AI:", ai_reply, "\n")

    if len(messages) >= 10:
      messages.pop(1)
    messages.append({"role": "assistant", "content": ai_reply})
    return ai_reply

Now the user can import this function

from llama import ask
answer = ask("What is the capital of France?")
print(f"๐Ÿค– Llama > {answer}")

Now is simple to use in other projects.

you can customize max_tokens and temperature.

from llama import ask
answer = ask("What is the capital of France?", max_tokens=1000, temperature=0.5)
print(answer)

Using AIProvider Class

import os
import requests
from typing import Optional

# Custom Imports
try:
    from .ai_providers import AIProvider, AiProviderList, AiProviderStatus
except ImportError:
    from ai_providers import AIProvider, AiProviderList, AiProviderStatus

GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")

class Llama(AIProvider):
    def __init__(self, token: Optional[str] = GITHUB_TOKEN):
        super().__init__()
        if not token:
            raise ValueError("GITHUB_TOKEN not found.")
        self.token = token
        self.model = "meta/Llama-4-Scout-17B-16E-Instruct"
        self.url = "https://models.github.ai/inference/chat/completions"
        self.headers = {
            "Authorization": f"Bearer {self.token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
            "X-GitHub-Api-Version": "2023-11-28"
        }
        self.add_message("system", "You are a helpful AI voice/text assistant")

    @property
    def name(self) -> str:
        return "GitHub Llama"

    def _call_api(self, message: list[dict[str, str]] | str) -> str:
        """Implementation of the abstract _call_api method for GitHub Llama"""
        try:
            payload = {
                "model": self.model,
                "messages": message if isinstance(message, list) else [{"role": "user", "content": message}],
                "max_tokens": self._max_tokens,
                "temperature": self._temperature
            }

            response = requests.post(self.url, headers=self.headers, json=payload)
            response.raise_for_status()
            data = response.json()

            assistant_response = data["choices"][0]["message"]["content"]
            
            if isinstance(message, list):
                self.add_message("assistant", assistant_response)

            return assistant_response

        except requests.RequestException as e:
            self.status = AiProviderStatus.ERROR
            return f"Error communicating with GitHub Llama API: {e}"
        except KeyError as e:
            self.status = AiProviderStatus.ERROR
            return f"Unexpected response format from GitHub Llama API: {e}"

    def ask(self, prompt: str) -> str:
        """Use the generic ask implementation from base class"""
        super().ask(prompt)
        return self._generic_ask(prompt)

    def ask_llama_api(self, message: list[dict[str, str]] | str) -> str:
        """Legacy method - now delegates to _call_api"""
        return self._call_api(message)

Testing

if __name__ == "__main__":
    print("๐Ÿค– Testing GitHub Llama with Memory/Context")
    print("=" * 50)

    questions = [
        "Hello! My name is Arun CS",
        "What's my name?",
    ]

    llama = Llama()

    for i, q in enumerate(questions, 1):
        print(f"\n๐Ÿธ Arun > {q}")
        answer = llama.ask(q)
        print(f"๐Ÿค– Llama > {answer}")
        print(f"โณ Response Time {i}: {llama.response_time:.2f} seconds")

    print("\n" + "=" * 50)
    print("๐Ÿ“š FULL CONVERSATION HISTORY:")
    llama.show_conversation_history()

    print("\n๐Ÿ“Š CONVERSATION STATISTICS:")
    stats = llama.get_conversation_stats()
    for key, value in stats.items():
        print(f"  {key.replace('_', ' ').title()}: {value}")

The example below is an example code for Ollama implemented using AIProvider.

if __name__ == "__main__":
    
    print(f"๐Ÿค– Testing {str(Ollama.name).capitalize()} with Memory/Context")
    print("=" * 50)

    questions = [
        "Hello! My name is Arun CS",
        "What's my name?",
    ]

    ollama = Ollama()

    for i, q in enumerate(questions, 1):
        print(f"\n๐Ÿธ Arun > {q}")
        answer = ollama.ask(q)
        print(f"๐Ÿค– Ollama > {answer}")
        print(f"โณ Response Time {i}: {ollama.response_time:.2f} seconds")

    print("\n" + "=" * 50)
    print("๐Ÿ“š FULL CONVERSATION HISTORY:")
    ollama.show_conversation_history()

    print("\n๐Ÿ“Š CONVERSATION STATISTICS:")
    stats = ollama.get_conversation_stats()
    for key, value in stats.items():
        print(f"  {key.replace('_', ' ').title()}: {value}")

Do you see any difference

IMPLEMENTATION: Ollama

ollama = Ollama()

Implementation: Llama

llama = Llama()

so to use this new implementation with minimal code is.

from ai_providers.llama import Llama
question = "Hello! My name is Arun CS"
llama = Llama()
answer = llama.ask(question)
print(f"๐Ÿค– Llama > {answer}")

And if you compare it with the refactored version .

![[#^5452c3]]

You can see the refactored one has less code but since its a function , if you want to add more and more ai providers you would have to do this

from ai_providers.llama import ask as ask_llama
from ai_providers.ollama import ask as ask_ollama
from ai_providers.cohere import ask as ask_cohere

question = "Hello! My name is Arun CS"
answer = ask_llama(question)
print(f"๐Ÿค– Llama > {answer}")
answer = ask_ollama(question)
print(f"๐Ÿค– Ollama > {answer}")
answer = ask_cohere(question)
print(f"๐Ÿค– Cohere > {answer}")

and if it was done using the class based approach

from ai_providers.llama import Llama
from ai_providers.ollama import Ollama
from ai_providers.cohere import Cohere
question = "Hello! My name is Arun CS"
llama = Llama()
ollama = Ollama()
cohere = Cohere()

answer = llama.ask(question)
print(f"๐Ÿค– Llama > {answer}")
answer = ollama.ask(question)
print(f"๐Ÿค– Ollama > {answer}")
answer = cohere.ask(question)
print(f"๐Ÿค– Cohere > {answer}")

Both looks same but the class based approach is more extensible and maintainale, for example if you want to ask an ai_provider, randomly you can do this

import random
from ai_providers.llama import Llama
from ai_providers.ollama import Ollama
from ai_providers.cohere import Cohere

ai_providers = [Llama(), Ollama(), Cohere()]
question = "Hello! My name is Arun CS"
selected_provider = random.choice(ai_providers)
answer = selected_provider.ask(question)
print(f"๐Ÿค– {selected_provider.name} > {answer}"  )

In this way you can easily add more ai providers without changing the code structure.