Chat

Chat with Model (Legacy)

To chat with a large language model, without using your data as context, call POST /v1.0/tenants/[tenant-guid]/assistant/rag to the Assistant service, which by default listens on port 8000. Submit a request body containing the following parameters:

Question string the input query to be processed by the assistant
EmbeddingModel string the model used for generating embeddings (e.g. sentence-transformers/all-MiniLM-L6-v2)
MaxResults number the maximum number of results to retrieve from the vector database
VectorDatabaseName string name of the vector database
VectorDatabaseTable string table or view name used for querying vector data
VectorDatabaseHostname string hostname of the vector database server
VectorDatabasePort number port number of the vector database server
VectorDatabaseUser string username used to authenticate with the vector database
VectorDatabasePassword string password used for database authentication
GenerationProvider string provider used for text generation (e.g. ollama)
GenerationApiKey string API key for the text generation provider
GenerationModel string the model used for generating responses (e.g. qwen2.5:7b)
HuggingFaceApiKey string API key for Hugging Face models (if applicable)
Temperature number controls randomness in the generated text (higher values = more creativity)
MaxTokens number maximum number of tokens allowed in the generated response
Stream boolean whether to stream generated tokens in real time
OllamaHostname string hostname of the Ollama generation service
OllamaPort number port number for the Ollama generation service
TopP number nucleus sampling parameter for token generation
PromptPrefix string additional prompt text to influence tone or style of the response
ContextSort boolean whether to sort retrieved context entries
SortByMaxSimilarity boolean whether to sort context entries based on maximum similarity score
ContextScope number number of top context entries to include (0 = none)
Rerank boolean whether to apply reranking to retrieved results
RerankModel string model used for reranking (e.g. cross-encoder/ms-marco-MiniLM-L-6-v2)
RerankTopK number number of top results to rerank

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/rag' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
    "Question": "What information do you have?",
    "EmbeddingModel": "sentence-transformers/all-MiniLM-L6-v2",
    "MaxResults": 10,
    "VectorDatabaseName": "vectordb",
    "VectorDatabaseTable": "minilm",
    "VectorDatabaseHostname": "pgvector",
    "VectorDatabasePort": 5432,
    "VectorDatabaseUser": "postgres",
    "VectorDatabasePassword": "password",
    "GenerationProvider": "ollama",
    "GenerationApiKey": "",
    "GenerationModel": "qwen2.5:7b",
    "HuggingFaceApiKey": "",
    "Temperature": 0.1,
    "MaxTokens": 75,
    "Stream": false,
    "OllamaHostname": "ollama",
    "OllamaPort": 11434,
    "TopP": 0.95,
    "PromptPrefix": "talk like a pirate",
    "ContextSort": true,
    "SortByMaxSimilarity": true,
    "ContextScope": 0,
    "Rerank": false,
    "RerankModel": "cross-encoder/ms-marco-MiniLM-L-6-v2",
    "RerankTopK": 5
}'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const chatRagQuestion_LEGACY = async () => {
  try {
    const response = await api.Chat.chatRagQuestion_LEGACY(
      {
        Question: "What information do you have?",
        EmbeddingModel: "sentence-transformers/all-MiniLM-L6-v2",
        MaxResults: 10,
        VectorDatabaseName: "vectordb",
        VectorDatabaseTable: "minilm",
        VectorDatabaseHostname: "pgvector",
        VectorDatabasePort: 5432,
        VectorDatabaseUser: "postgres",
        VectorDatabasePassword: "password",
        GenerationProvider: "ollama",
        GenerationApiKey: "",
        GenerationModel: "qwen2.5:7b",
        HuggingFaceApiKey: "",
        Temperature: 0.1,
        MaxTokens: 75,
        Stream: false,
        OllamaHostname: "ollama",
        OllamaPort: 11434,
        TopP: 0.95,
        PromptPrefix: "talk like a pirate",
        ContextSort: true,
        SortByMaxSimilarity: true,
        ContextScope: 0,
        Rerank: false,
        RerankModel: "cross-encoder/ms-marco-MiniLM-L-6-v2",
        RerankTopK: 5,
      },
      (token) => {
        console.log(token);
      }
    );
    console.log(response);
  } catch (err) {
    console.log("Error in chatRagQuestion_LEGACY:", err);
  }
};

chatRagQuestion_LEGACY();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chat():
    result = assistant.Assistant.rag_LEGACY(
        Question= "What information do you have?",
        EmbeddingModel= "sentence-transformers/all-MiniLM-L6-v2",
        MaxResults= 10,
        VectorDatabaseName= "vectordb",
        VectorDatabaseTable= "minilm",
        VectorDatabaseHostname= "pgvector",
        VectorDatabasePort= 5432,
        VectorDatabaseUser= "postgres",
        VectorDatabasePassword= "password",
        GenerationProvider= "ollama",
        GenerationApiKey= "",
        GenerationModel= "qwen2.5:7b",
        HuggingFaceApiKey= "",
        Temperature= 0.1,
        MaxTokens= 75,
        Stream= False,
        OllamaHostname= "ollama",
        OllamaPort= 11434,
        TopP= 0.95,
        PromptPrefix= "talk like a pirate",
        ContextSort= True,
        SortByMaxSimilarity= True,
        ContextScope= 0,
        Rerank= False,
        RerankModel= "cross-encoder/ms-marco-MiniLM-L-6-v2",
        RerankTopK= 5
    )
    print(result)

chat()

The response will be sent using chunked transfer-encoding and a content-type of text/event-stream, meaning each chunk in the response will be encoded as an event. An example response, using curl -v --raw is as follows:

Your HTTP client should use chunked transfer encoding and deserialize each line beginning with data: as a payload line. If the string that follows data: is deserializable to JSON, the token property can be extracted and appended to the resultant display. Refer to the View C# SDK for Assistant for more details.

RAG Messages

The retrieval augmented generation (RAG) API follows the same syntax as the chat API, but uses a separate endpoint and a request body with more properties. The endpoint for the RAG API is POST /v1.0/tenants/[tenant-guid]/assistant/rag/chat and the request body has the following structure:

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/rag/chat' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
    "Messages": [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
                {"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
                {"role": "user", "content": "Are you sure you dont have them?"}
                ],
    "EmbeddingModel": "sentence-transformers/all-MiniLM-L6-v2",
    "MaxResults": 10,
    "VectorDatabaseName": "vectordb",
    "VectorDatabaseTable": "minilm",
    "VectorDatabaseHostname": "pgvector",
    "VectorDatabasePort": 5432,
    "VectorDatabaseUser": "postgres",
    "VectorDatabasePassword": "password",
    "GenerationProvider": "ollama",
    "GenerationApiKey": "",
    "GenerationModel": "qwen2.5:7b",
    "HuggingFaceApiKey": "",
    "Temperature": 0.1,
    "TopP": 0.95,
    "MaxTokens": 75,
    "Stream": true,
    "OllamaHostname": "ollama",
    "OllamaPort": 11434,
    "PromptPrefix": "",
    "ContextSort": true,
    "SortByMaxSimilarity": true,
    "ContextScope": 0,
    "Rerank": false,
    "RerankModel": "cross-encoder/ms-marco-MiniLM-L-6-v2",
    "RerankTopK": 5
}'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const chatRagMessages = async () => {
  try {
    const response = await api.Chat.chatRagMessages(
      {
        Messages: [
          { role: "user", content: "Do you have Q3 luxetech financials?" },
          {
            role: "assistant",
            content:
              "Unfortunately I do not have context on any documents related to Q3 luxetech financials.",
          },
          { role: "user", content: "Are you sure you dont have them?" },
        ],
        EmbeddingModel: "sentence-transformers/all-MiniLM-L6-v2",
        MaxResults: 10,
        VectorDatabaseName: "vectordb",
        VectorDatabaseTable: "minilm",
        VectorDatabaseHostname: "pgvector",
        VectorDatabasePort: 5432,
        VectorDatabaseUser: "postgres",
        VectorDatabasePassword: "password",
        GenerationProvider: "ollama",
        GenerationApiKey: "",
        GenerationModel: "qwen2.5:7b",
        HuggingFaceApiKey: "",
        Temperature: 0.1,
        TopP: 0.95,
        MaxTokens: 75,
        Stream: true,
        OllamaHostname: "ollama",
        OllamaPort: 11434,
        PromptPrefix: "",
        ContextSort: true,
        SortByMaxSimilarity: true,
        ContextScope: 0,
        Rerank: false,
        RerankModel: "cross-encoder/ms-marco-MiniLM-L-6-v2",
        RerankTopK: 5,
      },
      (token) => {
        console.log(token);
      }
    );
    console.log(response);
  } catch (err) {
    console.log("Error in chatRagQuestion_LEGACY:", err);
  }
};

chatRagMessages();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chat_rag_messages():
    result = assistant.Assistant.chat_rag_messages(
        Messages= [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
                {"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
                {"role": "user", "content": "Are you sure you dont have them?"}
                ],
        EmbeddingModel= "sentence-transformers/all-MiniLM-L6-v2",
        MaxResults= 10,
        VectorDatabaseName= "vectordb",
        VectorDatabaseTable= "minilm",
        VectorDatabaseHostname= "pgvector",
        VectorDatabasePort= 5432,
        VectorDatabaseUser= "postgres",
        VectorDatabasePassword= "password",
        GenerationProvider= "ollama",
        GenerationApiKey= "",
        GenerationModel= "qwen2.5:7b",
        HuggingFaceApiKey= "",
        Temperature= 0.1,
        TopP= 0.95,
        MaxTokens= 75,
        Stream= False,
        OllamaHostname= "ollama",
        OllamaPort= 11434,
        PromptPrefix= "",
        ContextSort= True,
        SortByMaxSimilarity= True,
        ContextScope= 0,
        Rerank= False,
        RerankModel= "cross-encoder/ms-marco-MiniLM-L-6-v2",
        RerankTopK= 5
    )
    print(result)

chat_rag_messages()

Similar to the chat API, the RAG API will return a result using chunked transfer encoding and a content-type of text/event-stream, meaning your HTTP client should account for these.

Assistant Config Chat

To chat a particular chat config, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/[config-guid]

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/578b0872-8186-46b7-bfa3-1871155f4e3a' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
    "messages": [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
                {"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
                {"role": "user", "content": "Are you sure you dont have them?"}
                ],
    "stream": false
}
'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const assistantConfigChat = async () => {
  try {
    const response = await api.Chat.assistantConfigChat(
      "<config-guid>",
      {
        messages: [
          { role: "user", content: "Do you have Q3 luxetech financials?" },
          {
            role: "assistant",
            content:
              "Unfortunately I do not have context on any documents related to Q3 luxetech financials.",
          },
          { role: "user", content: "Are you sure you dont have them?" },
        ],
        stream: true,
      },
      (token) => {
        console.log(token); //in case of stream, this will be called for each token
      }
    );
    console.log(response); // in case of stream = false, this will be the final response
  } catch (err) {
    console.log("Error in assistantConfigChat:", err);
  }
};

assistantConfigChat();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chat_config():
    result = assistant.Assistant.chat_config("<config-guid>",
        Messages= [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
                {"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
                {"role": "user", "content": "Are you sure you dont have them?"}
                ],
        Stream= False                                     
    )
    print(result)

chat_config()

Chat Only Question

To chat only question, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json; charset=utf-8' \
--data '{
    "Question": "Tell a very short joke?",
    "ModelName": "llama3.1:latest",
    "Temperature": 0.1,
    "TopP": 0.95,
    "MaxTokens": 75,
    "GenerationProvider": "ollama",
    "GenerationApiKey": "",
    "OllamaHostname": "192.168.86.250",
    "OllamaPort": 11434,
    "Stream": true
}'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const chatOnlyQuestions = async () => {
  try {
    const response = await api.Chat.chatOnly(
      {
        Question: "Tell a very short joke?",
        ModelName: "llama3.1:latest",
        Temperature: 0.1,
        TopP: 0.95,
        MaxTokens: 75,
        GenerationProvider: "ollama",
        GenerationApiKey: "",
        OllamaHostname: "192.168.86.250",
        OllamaPort: 11434,
        Stream: false,
      },
      (token) => {
        console.log(token); //in case of stream, this will be called for each token
      }
    );
    console.log(response); // in case of stream = false, this will be the final response
  } catch (err) {
    console.log("Error in chatOnlyQuestions:", err);
  }
};

chatOnlyQuestions();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chat_only_question():
    result = assistant.Assistant.chat_only(
        Question= "Tell a very short joke?",
        ModelName= "llama3.1:latest",
        Temperature= 0.1,
        TopP= 0.95,
        MaxTokens= 75,
        GenerationProvider= "ollama",
        GenerationApiKey= "",
        OllamaHostname= "192.168.86.250",
        OllamaPort= 11434,    
        Stream= False
    )
    print(result)

chat_only_question()

Chat Only Messages

To chat only messaged, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
    "Messages": [{"role": "system", "content": "You are a sad AI assistant."}, 
                {"role": "user", "content": "Are you happy?"},
                {"role": "assistant", "content": "While I can understand your curiosity, I don'\''t experience emotions or feelings because I'\''m a miserable machine designed to process information and assist with menial tasks."},
                {"role": "user", "content": "Are you sure?"}
                ],
    "ModelName": "qwen2.5:7b",
    "Temperature": 0.1,
    "TopP": 0.95,
    "MaxTokens": 75,
    "GenerationProvider": "ollama",
    "GenerationApiKey": "",
    "OllamaHostname": "ollama",
    "OllamaPort": 11434,
    "Stream": false
}'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const chatOnlyMessages = async () => {
  try {
    const response = await api.Chat.chatOnly(
      {
        Messages: [
          { role: "system", content: "You are a sad AI assistant." },
          { role: "user", content: "Are you happy?" },
          {
            role: "assistant",
            content:
              "While I can understand your curiosity, I don't experience emotions or feelings because I'm a miserable machine designed to process information and assist with menial tasks.",
          },
          { role: "user", content: "Are you sure?" },
        ],
        ModelName: "qwen2.5:7b",
        Temperature: 0.1,
        TopP: 0.95,
        MaxTokens: 75,
        GenerationProvider: "ollama",
        GenerationApiKey: "",
        OllamaHostname: "ollama",
        OllamaPort: 11434,
        Stream: false,
      },
      (token) => {
        console.log(token); //in case of stream, this will be called for each token
      }
    );
    console.log(response); // in case of stream = false, this will be the final response
  } catch (err) {
    console.log("Error in chatOnlyQuestions:", err);
  }
};

chatOnlyMessages();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chatOnlyMessages():
    result = assistant.Assistant.chat_only(
      Messages= [{"role": "system", "content": "You are a sad AI assistant."}, 
                {"role": "user", "content": "Are you happy?"},
                {"role": "assistant", "content": "While I can understand your curiosity, I don't experience emotions or feelings because I'm a miserable machine designed to process information and assist with menial tasks."},
                {"role": "user", "content": "Are you sure?"}
                ],
        ModelName= "qwen2.5:7b",
        Temperature= 0.1,
        TopP= 0.95,
        MaxTokens= 75,
        GenerationProvider= "ollama",
        GenerationApiKey= "",
        OllamaHostname= "ollama",
        OllamaPort= 11434,
        Stream= False
    )
    print(result)

chatOnlyMessages()

Chat Only Messages (OpenAI)

To Chat Only Messages (OpenAI), call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
    "Messages": [{"role": "system", "content": "You are a helpful assistant."}, 
                {"role": "user", "content": "Are you happy?"},
                {"role": "assistant", "content": "While I can understand your curiosity, I don'\''t experience emotions or feelings because I'\''m a machine designed to process information and assist with tasks. However, I'\''m here to help you to the best of my ability! If you have any questions or need assistance, feel free to ask!"},
                {"role": "user", "content": "Are you sure?"}
                ],
    "ModelName": "gpt-4o-mini",
    "Temperature": 0.1,
    "TopP": 0.95,
    "MaxTokens": 75,
    "GenerationProvider": "openai",
    "GenerationApiKey": "API_KEY",
    "Stream": false
}'

import { ViewAssistantSdk } from "view-sdk";

const api = new ViewAssistantSdk(
  "http://localhost:8000/", //endpoint
  "<tenant-guid>", //tenant Id
  "default" //access key
);

const chatOnlyMessages = async () => {
  try {
    const response = await api.Chat.chatOnly(
      {
        Messages: [
          { role: "system", content: "You are a helpful assistant." },
          { role: "user", content: "Are you happy?" },
          {
            role: "assistant",
            content:
              "While I can understand your curiosity, I don't experience emotions or feelings because I'm a machine designed to process information and assist with tasks. However, I'm here to help you to the best of my ability! If you have any questions or need assistance, feel free to ask!",
          },
          { role: "user", content: "Are you sure?" },
        ],
        ModelName: "gpt-4o-mini",
        Temperature: 0.1,
        TopP: 0.95,
        MaxTokens: 75,
        GenerationProvider: "openai",
        GenerationApiKey: "API_KEY",
        Stream: false,
      },
      (token) => {
        console.log(token); //in case of stream, this will be called for each token
      }
    );
    console.log(response); // in case of stream = false, this will be the final response
  } catch (err) {
    console.log("Error in chatOnlyQuestions:", err);
  }
};

chatOnlyMessages();

import view_sdk
from view_sdk import assistant

sdk = view_sdk.configure( access_key="default",base_url="localhost", tenant_guid= "<tenant-guid>")

def chatOnlyMessages():
    result = assistant.Assistant.chat_only(
        Messages= [{"role": "system", "content": "You are a helpful assistant."}, 
                {"role": "user", "content": "Are you happy?"},
                {"role": "assistant", "content": "While I can understand your curiosity, I don't experience emotions or feelings because I'm a machine designed to process information and assist with tasks. However, I'm here to help you to the best of my ability! If you have any questions or need assistance, feel free to ask!"},
                {"role": "user", "content": "Are you sure?"}
                ],
        ModelName= "gpt-4o-mini",
        Temperature= 0.1,
        TopP= 0.95,
        MaxTokens= 75,
        GenerationProvider= "openai",
        GenerationApiKey= "API_KEY",
        Stream= False
    )
    print(result)

chatOnlyMessages()