AI-Providers
from abc import ABC, abstractmethod
from enum import Enum
from datetime import datetime
import time
import threading
class AiProviderStatus(Enum):
IDLE = "Idle"
BUSY = "Busy"
ERROR = "Error"
"""If its offline , create a seperate thread ans ask a demo question to check its status"""
OFFLINE = "Offline"
TIMEOUT = "Timeout"
class AiProviderList(Enum):
OLLAMA = "Ollama"
ALEXA = "Alexa"
GITHUB_GPT_5 = "GitHub GPT-5"
COHERE = "Cohere"
GEMINI = "Gemini"
class QuestionAndAnswer:
def __init__(
self,
question: str,
answer: str,
):
self.question = question
self.answer = answer
self.timestamp = datetime.now()
def to_dict(self) -> dict:
return {
"question": self.question,
"answer": self.answer,
"timestamp": self.timestamp.isoformat()
}
def __repr__(self) -> str:
return f"QuestionAndAnswer(question='{self.question[:50]}...', answer='{self.answer[:50]}...', timestamp={self.timestamp})"
def __str__(self) -> str:
return f"Q: {self.question}\nA: {self.answer}\nTime: {self.timestamp}"
@classmethod
def from_dict(cls, data: dict):
timestamp = datetime.fromisoformat(data["timestamp"]) if "timestamp" in data else None
return cls(question=data["question"], answer=data["answer"], timestamp=timestamp)
class AIProvider(ABC):
def __init__(self):
self._status = AiProviderStatus.IDLE
"""List of responses received from the AI provider"""
self._responses = []
"""List of messages in the conversation history"""
self._messages: list[dict[str, str]] | list = []
self._QandAs: list[QuestionAndAnswer] = []
self._question_asked_time: float = 0.0
self._answer_time: float = 0.0
self._response_time: float = 0.0
self._last_answer_time: float = 0.0
"""Request Params"""
self._temperature: float = 0.3
self._max_tokens: int = 150
"""For Threading"""
self._thread: threading.Thread | None = None
self._timeout: int = 10 # Default timeout in seconds
self._stop_event: threading.Event = threading.Event()
self._last_result: str = ""
@property
def QandAs(self) -> list[QuestionAndAnswer]:
return self._QandAs
@QandAs.setter
def QandAs(self, value: list[QuestionAndAnswer]):
self._QandAs = value
def add_QandA(self, question: str, answer: str) -> None:
q_and_a = QuestionAndAnswer(
question=question,
answer=answer
)
self._QandAs.append(q_and_a)
@property
def thread(self) -> threading.Thread | None:
return self._thread
@property
def timeout(self) -> int:
return self._timeout
@timeout.setter
def timeout(self, value: int) -> None:
self._timeout = value
def ask_with_timeout(self, prompt: str) -> str:
"""Ask with timeout - kills thread if it takes too long"""
self._stop_event.clear()
# Create and start thread
self._thread = threading.Thread(target=self._ask_with_stop_check, args=(prompt,))
self._thread.daemon = True # Make it a daemon thread
self._thread.start()
self._thread.join(timeout=self._timeout)
if self._thread.is_alive():
print(f"โ ๏ธ {self.name} request timed out after {self._timeout} seconds")
self._stop_event.set()
self._thread.join(timeout=0.5)
if self._thread.is_alive():
print(f"โ {self.name} with id {self._thread.ident} did not stop")
self.status = AiProviderStatus.TIMEOUT
# return f"Request to {self.name} timed out after {self._timeout} seconds"
return ""
# Thread completed successfully
return getattr(self, '_last_result', '')
def _ask_with_stop_check(self, prompt: str) -> None:
"""Internal method that checks for stop signal during execution"""
try:
if self._stop_event.is_set():
return
result = self.ask(prompt)
if not self._stop_event.is_set():
self._last_result = result
except Exception as e:
if not self._stop_event.is_set():
print(f"Error in thread: {e}")
self._last_result = f"Error: {e}"
@abstractmethod
def ask(self, prompt: str) -> str:
"""Abstract method that must be implemented by subclasses"""
# Check for stop signal at the beginning
if hasattr(self, '_stop_event') and self._stop_event.is_set():
return "Request was cancelled due to timeout"
message = {"role": "user", "content": prompt}
self._messages.append(message)
# Don't return anything - let subclasses handle the actual response
return "" # Return empty string instead of None
@abstractmethod
def _call_api(self, message: list[dict[str, str]] | str) -> str:
"""Abstract method that must be implemented by subclasses to handle actual API calls"""
pass
def _generic_ask(self, prompt: str) -> str:
"""Generic ask implementation that can be used by subclasses"""
if not prompt or not prompt.strip():
print(f"{ self.name }: Prompt validation failed")
return "Please provide a valid prompt."
print("DEBUG: Prompt validation passed, proceeding...")
try:
self.status = AiProviderStatus.BUSY
self.add_message("user", prompt)
# Check for stop signal before API call
if hasattr(self, '_stop_event') and self._stop_event.is_set():
self.status = AiProviderStatus.ERROR
return "Request was cancelled"
# Send full conversation history for context
self.answer = self._call_api(self.messages)
self.status = AiProviderStatus.IDLE
self.add_QandA(
self.messages[-1]['content'],
self.answer
)
return self.answer
except Exception as e:
self.status = AiProviderStatus.ERROR
return f"Error communicating with {self.name}: {e}"
@property
@abstractmethod
def name(self) -> str:
"""Abstract method that must be implemented by subclasses"""
pass
@property
def status(self) -> AiProviderStatus:
"""Property getter for status"""
return self._status
@status.setter
def status(self, value: AiProviderStatus) -> None:
"""Property setter for status"""
if value == AiProviderStatus.IDLE:
self._answer_time = time.time()
elif value == AiProviderStatus.BUSY:
self._question_asked_time = time.time()
elif value == AiProviderStatus.ERROR:
if self._question_asked_time > 0:
self._answer_time = time.time()
self._status = value
@property
def answer_time(self) -> float:
"""Property getter for answer_time"""
return self._answer_time
@property
def question_asked_time(self) -> float:
"""Property getter for question_asked_time"""
return self._question_asked_time
@property
def messages(self) -> list[dict[str, str]]:
"""Property getter for messages"""
return self._messages
@messages.setter
def messages(self, value: list[dict[str, str]]) -> None:
"""Property setter for messages"""
""" For getting question response time """
self._question_asked_time = time.time()
self._messages = value
def add_message(self, role: str, content: str) -> None:
"""Method to add a message to the messages list"""
self._messages.append({"role": role, "content": content})
def clear_messages(self) -> None:
"""Method to clear the messages list"""
self._messages = []
print("Conversation history cleared")
"""HACK: Refactor this. to smaller methods"""
def show_conversation_history(self) -> None:
"""Display the current conversation history"""
print(f"\n๐ Conversation History ({len(self.messages)} messages):")
for i, msg in enumerate(self.messages):
print(f" {i+1}. {msg['role'].upper()}: {msg['content'][:100]}{'...' if len(msg['content']) > 100 else ''}")
print()
def get_conversation_stats(self) -> dict:
"""Get statistics about the current conversation"""
user_messages = [msg for msg in self.messages if msg['role'] == 'user']
assistant_messages = [msg for msg in self.messages if msg['role'] == 'assistant']
total_chars = sum(len(msg['content']) for msg in self.messages)
return {
'total_messages': len(self.messages),
'user_messages': len(user_messages),
'assistant_messages': len(assistant_messages),
'total_characters': total_chars,
'avg_message_length': total_chars / len(self.messages) if self.messages else 0
}
def test_timeout(self) -> None:
"""Test the timeout functionality"""
print(f"๐งช Testing {self.name} timeout (set to {self._timeout} seconds)")
# Test with a prompt that should complete quickly
result = self.ask_with_timeout("Hello, how are you?")
print(f"โ
Quick response: {result[:50]}...")
# Test with timeout by setting a very short timeout
original_timeout = self._timeout
self._timeout = 1 # Very short timeout for testing
import time
start_time = time.time()
result = self.ask_with_timeout("Please take your time responding to this question with a long, detailed answer that will definitely exceed the timeout.")
end_time = time.time()
print(f"โฑ๏ธ Timeout test took: {end_time - start_time:.2f} seconds")
print(f"๐ Result: {result}")
# Restore original timeout
self._timeout = original_timeout
@property
def response_time(self) -> float:
"""Calculate and return the response time for the last interaction"""
if self._answer_time > 0 and self._question_asked_time > 0:
self._response_time = self._answer_time - self._question_asked_time
self._last_answer_time = self._response_time
return self._response_time
return self._last_answer_time
Methods
Abstract Methods
ask()
@abstractmethod
def ask(self, prompt: str) -> str:
"""Abstract method that must be implemented by subclasses"""
# Check for stop signal at the beginning
if hasattr(self, '_stop_event') and self._stop_event.is_set():
return "Request was cancelled due to timeout"
message = {"role": "user", "content": prompt}
self._messages.append(message)
# Don't return anything - let subclasses handle the actual response
return "" # Return empty string instead of None
- This ask automatically adds the user message to the messages list.
name()
@property
@abstractmethod
def name(self) -> str:
"""Abstract method that must be implemented by subclasses"""
pass
- Just to get the name of your implementation
_call_api()
@abstractmethod
def _call_api(self, message: list[dict[str, str]] | str) -> str:
"""Abstract method that must be implemented by subclasses to handle actual API calls"""
pass
- Every ai_provider might have different API call mechanism, so you have to implement it youself so that , it can be called by the generic ask method.
Why?
See below example sent by @abhayagovind
import os
import requests
token = os.getenv("GITHUB_TOKEN")
if not token:
raise ValueError("GITHUB_TOKEN not found.")
url = "https://models.github.ai/inference/chat/completions"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
"X-GitHub-Api-Version": "2023-11-28"
}
messages = [
{"role": "system", "content": "You are a helpful AI voice/text assistant"}
]
print("๐ค AI Assistant is ready! Type 'exit' to quit.\n")
while True:
user_input = input("You: ")
if user_input.lower() in ["exit", "quit"]:
print("๐ Goodbye!")
break
messages.append({"role": "user", "content": user_input})
payload = {
"model": "meta/Llama-4-Scout-17B-16E-Instruct",
"messages": messages,
"max_tokens": 500,
"temperature": 0.7
}
response = requests.post(url, headers=headers, json=payload).json()
ai_reply = response["choices"][0]["message"]["content"]
print("AI:", ai_reply, "\n")
messages.append({"role": "assistant", "content": ai_reply})
Explanation
- She imports required modules.
import os
import requests
- Obtains the
GITHUB_TOKENfrom .env file (environment variable)
token = os.getenv("GITHUB_TOKEN")
if not token:
raise ValueError("GITHUB_TOKEN not found.")
- Sets url and headers for the API request
url = "https://models.github.ai/inference/chat/completions"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
"X-GitHub-Api-Version": "2023-11-28"
}
- Creates a message list for history.
messages = [
{"role": "system", "content": "You are a helpful AI voice/text assistant"}
]
- Uses a while loop and continuesly asks for user input until "exit" or "quit" is typed.
while True:
user_input = input("You: ")
if user_input.lower() in ["exit", "quit"]:
print("๐ Goodbye!")
break
messages.append({"role": "user", "content": user_input})
payload = {
"model": "meta/Llama-4-Scout-17B-16E-Instruct",
"messages": messages,
"max_tokens": 500,
"temperature": 0.7
}
response = requests.post(url, headers=headers, json=payload).json()
ai_reply = response["choices"][0]["message"]["content"]
print("AI:", ai_reply, "\n")
messages.append({"role": "assistant", "content": ai_reply})
Features:
- History is maintained
Issues
- if the response is is invalid or the API server is down, it will crash.
- I would have to run this program directly ,
like
python llama.py
Minor Modification
Lets look at a refactored version
import os
import requests
token = os.getenv("GITHUB_TOKEN")
if not token:
raise ValueError("GITHUB_TOKEN not found.")
url = "https://models.github.ai/inference/chat/completions"
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"Accept": "application/json",
"X-GitHub-Api-Version": "2023-11-28"
}
messages = [
{"role": "system", "content": "You are a helpful AI voice/text assistant"}
]
def ask(prompt: str, max_tokens: int = 500, temperature: float = 0.7) -> str:
user_input = prompt
if user_input.lower() in ["exit", "quit"]:
return ""
messages.append({"role": "user", "content": user_input})
payload = {
"model": "meta/Llama-4-Scout-17B-16E-Instruct",
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
response = requests.post(url, headers=headers, json=payload).json()
response.raise_for_status()
ai_reply = response["choices"][0]["message"]["content"]
print("AI:", ai_reply, "\n")
if len(messages) >= 10:
messages.pop(1)
messages.append({"role": "assistant", "content": ai_reply})
return ai_reply
Now the user can import this function
from llama import ask
answer = ask("What is the capital of France?")
print(f"๐ค Llama > {answer}")
Now is simple to use in other projects.
you can customize max_tokens and temperature.
from llama import ask
answer = ask("What is the capital of France?", max_tokens=1000, temperature=0.5)
print(answer)
Using AIProvider Class
import os
import requests
from typing import Optional
# Custom Imports
try:
from .ai_providers import AIProvider, AiProviderList, AiProviderStatus
except ImportError:
from ai_providers import AIProvider, AiProviderList, AiProviderStatus
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
class Llama(AIProvider):
def __init__(self, token: Optional[str] = GITHUB_TOKEN):
super().__init__()
if not token:
raise ValueError("GITHUB_TOKEN not found.")
self.token = token
self.model = "meta/Llama-4-Scout-17B-16E-Instruct"
self.url = "https://models.github.ai/inference/chat/completions"
self.headers = {
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json",
"Accept": "application/json",
"X-GitHub-Api-Version": "2023-11-28"
}
self.add_message("system", "You are a helpful AI voice/text assistant")
@property
def name(self) -> str:
return "GitHub Llama"
def _call_api(self, message: list[dict[str, str]] | str) -> str:
"""Implementation of the abstract _call_api method for GitHub Llama"""
try:
payload = {
"model": self.model,
"messages": message if isinstance(message, list) else [{"role": "user", "content": message}],
"max_tokens": self._max_tokens,
"temperature": self._temperature
}
response = requests.post(self.url, headers=self.headers, json=payload)
response.raise_for_status()
data = response.json()
assistant_response = data["choices"][0]["message"]["content"]
if isinstance(message, list):
self.add_message("assistant", assistant_response)
return assistant_response
except requests.RequestException as e:
self.status = AiProviderStatus.ERROR
return f"Error communicating with GitHub Llama API: {e}"
except KeyError as e:
self.status = AiProviderStatus.ERROR
return f"Unexpected response format from GitHub Llama API: {e}"
def ask(self, prompt: str) -> str:
"""Use the generic ask implementation from base class"""
super().ask(prompt)
return self._generic_ask(prompt)
def ask_llama_api(self, message: list[dict[str, str]] | str) -> str:
"""Legacy method - now delegates to _call_api"""
return self._call_api(message)
Testing
if __name__ == "__main__":
print("๐ค Testing GitHub Llama with Memory/Context")
print("=" * 50)
questions = [
"Hello! My name is Arun CS",
"What's my name?",
]
llama = Llama()
for i, q in enumerate(questions, 1):
print(f"\n๐ธ Arun > {q}")
answer = llama.ask(q)
print(f"๐ค Llama > {answer}")
print(f"โณ Response Time {i}: {llama.response_time:.2f} seconds")
print("\n" + "=" * 50)
print("๐ FULL CONVERSATION HISTORY:")
llama.show_conversation_history()
print("\n๐ CONVERSATION STATISTICS:")
stats = llama.get_conversation_stats()
for key, value in stats.items():
print(f" {key.replace('_', ' ').title()}: {value}")
The example below is an example code for Ollama implemented using AIProvider.
if __name__ == "__main__":
print(f"๐ค Testing {str(Ollama.name).capitalize()} with Memory/Context")
print("=" * 50)
questions = [
"Hello! My name is Arun CS",
"What's my name?",
]
ollama = Ollama()
for i, q in enumerate(questions, 1):
print(f"\n๐ธ Arun > {q}")
answer = ollama.ask(q)
print(f"๐ค Ollama > {answer}")
print(f"โณ Response Time {i}: {ollama.response_time:.2f} seconds")
print("\n" + "=" * 50)
print("๐ FULL CONVERSATION HISTORY:")
ollama.show_conversation_history()
print("\n๐ CONVERSATION STATISTICS:")
stats = ollama.get_conversation_stats()
for key, value in stats.items():
print(f" {key.replace('_', ' ').title()}: {value}")
Do you see any difference
IMPLEMENTATION: Ollama
ollama = Ollama()
Implementation: Llama
llama = Llama()
so to use this new implementation with minimal code is.
from ai_providers.llama import Llama
question = "Hello! My name is Arun CS"
llama = Llama()
answer = llama.ask(question)
print(f"๐ค Llama > {answer}")
And if you compare it with the refactored version .
![[#^5452c3]]
You can see the refactored one has less code but since its a function , if you want to add more and more ai providers you would have to do this
from ai_providers.llama import ask as ask_llama
from ai_providers.ollama import ask as ask_ollama
from ai_providers.cohere import ask as ask_cohere
question = "Hello! My name is Arun CS"
answer = ask_llama(question)
print(f"๐ค Llama > {answer}")
answer = ask_ollama(question)
print(f"๐ค Ollama > {answer}")
answer = ask_cohere(question)
print(f"๐ค Cohere > {answer}")
and if it was done using the class based approach
from ai_providers.llama import Llama
from ai_providers.ollama import Ollama
from ai_providers.cohere import Cohere
question = "Hello! My name is Arun CS"
llama = Llama()
ollama = Ollama()
cohere = Cohere()
answer = llama.ask(question)
print(f"๐ค Llama > {answer}")
answer = ollama.ask(question)
print(f"๐ค Ollama > {answer}")
answer = cohere.ask(question)
print(f"๐ค Cohere > {answer}")
Both looks same but the class based approach is more extensible and maintainale, for example if you want to ask an ai_provider, randomly you can do this
import random
from ai_providers.llama import Llama
from ai_providers.ollama import Ollama
from ai_providers.cohere import Cohere
ai_providers = [Llama(), Ollama(), Cohere()]
question = "Hello! My name is Arun CS"
selected_provider = random.choice(ai_providers)
answer = selected_provider.ask(question)
print(f"๐ค {selected_provider.name} > {answer}" )
In this way you can easily add more ai providers without changing the code structure.