import os
from dotenv import load_dotenv
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from nltk.tokenize import word_tokenize
from fastapi import Request, Depends

# chromadb.py
embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME")

# Load environment variables and configuration
if not load_dotenv():
    print("Could not load .env file or it is empty. Please check if it exists and is readable.")
    exit(1)


class ChromaDB:
    def __init__(self, location: str):
        # Initialize the database connection
        self.location = location
        self.connection = self.connect_to_db()

    def connect_to_db(self):
        # Connect to ChromaDB at the specified location
        # For simplicity, I'm assuming it's a connection object. Replace with actual logic.
        connection = None  # Placeholder
        return connection

    def query(self, user_input: str):
        # Implement the logic to query ChromaDB using RAG based on the user_input
        # This should return relevant business content
        return "Sample business content based on user input."  # Placeholder

    def close(self):
        # Close the database connection
        pass  # Placeholder


#To get our DB instance - Multi tenant for every single customer & single project
def get_chromadb_location(account_id: str, project_id: str) -> str:
    return f"./data/{account_id}/{project_id}/chromadb/"

#Initialize ChromaDB with Embeddings
def initialize_chromadb(account_id: str, project_id: str):
    chromadb_location = get_chromadb_location(account_id, project_id)
    embeddings = HuggingFaceEmbeddings(model_name=os.environ.get("EMBEDDINGS_MODEL_NAME"))
    return Chroma(persist_directory=chromadb_location, embedding_function=embeddings)

#Initialize ChromaDB with out embeddings
def normal_initialize_hromadb(account_id: str, project_id: str) -> ChromaDB:
    """
    Initialize and return a ChromaDB instance for the given account and project.
    """
    chromadb_location = get_chromadb_location(account_id, project_id)
    db_instance = ChromaDB(chromadb_location)
    return db_instance


def query_business_content_from_vector_db(user_input: str, db_instance: ChromaDB):
    """
    Use the provided user_input to query the vector database (ChromaDB with RAG) 
    and retrieve relevant business content.
    """
    content = db_instance.query(user_input)
    return content

class ChromaDocument:
    def __init__(self, name, file_name, content):
        self.name = name
        self.file_name = file_name
        self.content = content

def retrieve_documents_from_chromadb(db, user_input_text):

    retrieved_docs = db.search(user_input_text, search_type='similarity', k=5)
    
    result = []
    for doc in retrieved_docs:
        # Adjusting the attribute accesses based on the sample Document provided
        content = doc.page_content
        file_name = doc.metadata.get("source", "")
        
        # If there's a specific way to extract a "name" from the document, you can add that logic here.
        # For now, I'll use the file name as the name for simplicity.
        name = os.path.basename(file_name)
        
        result.append(ChromaDocument(name, file_name, content))
    
    return result

def process_documents_from_chromadb(retrieved_docs):
    total_content = []
    links = []
    youtube = []
    docs = []
    root_url = ''

    for doc in retrieved_docs:
        file_name = doc.file_name
        content = " ".join(doc.content.split())
        
        total_content.append(content)
        
        print(file_name)
        # Categorizing filenames
        if "_transcript.txt" in file_name:
            youtube_id = file_name.split("_transcript.txt")[0]
            youtubecode = youtube_id.rsplit("/", 1)[-1]
            youtube.append(youtubecode)

        elif ".txt" in file_name:
            # Read the content of the file
            with open(file_name, 'r') as f:
                content_lines = f.readlines()

            # Check for a line starting with "Title:"
            title_from_content = None
            for line in content_lines:
                if line.startswith("Title:"):
                    title_from_content = line.replace("Title:", "").strip()
                    break

            # Handle double "www."
            link = file_name
            if "www.www." in link:
                link = link.replace("www.www.", "www.")
            
            # Check if it's a website link and extract title
            link = link.split("/")[-1].replace("_", "/").replace(".txt", "")
            if title_from_content:
                title = title_from_content
            else:
                title = link.split("/")[-1].replace(".html", "").replace("-", " ").title()
            link_with_title = f'{link}:::{title}'
            links.append(link_with_title)
            
        else:
            doc_link = f"{root_url}/download/{file_name.split('/')[-1]}"
            docs.append(doc_link)

    total_content = " ".join(total_content).replace("\n", " ").strip()
    
    print(links, youtube, docs)
    return total_content, links, youtube, docs

def truncate_content(content, max_tokens):
    tokens = word_tokenize(content)
    truncated_content = ' '.join(tokens[:max_tokens])
    return truncated_content, len(tokens)