Chat with Model (Legacy)
To chat with a large language model, without using your data as context, call POST /v1.0/tenants/[tenant-guid]/assistant/rag
to the Assistant service, which by default listens on port 8000
. Submit a request body containing the following parameters:
Question
string
the input query to be processed by the assistantEmbeddingModel
string
the model used for generating embeddings (e.g.sentence-transformers/all-MiniLM-L6-v2
)MaxResults
number
the maximum number of results to retrieve from the vector databaseVectorDatabaseName
string
name of the vector databaseVectorDatabaseTable
string
table or view name used for querying vector dataVectorDatabaseHostname
string
hostname of the vector database serverVectorDatabasePort
number
port number of the vector database serverVectorDatabaseUser
string
username used to authenticate with the vector databaseVectorDatabasePassword
string
password used for database authenticationGenerationProvider
string
provider used for text generation (e.g.ollama
)GenerationApiKey
string
API key for the text generation providerGenerationModel
string
the model used for generating responses (e.g.qwen2.5:7b
)HuggingFaceApiKey
string
API key for Hugging Face models (if applicable)Temperature
number
controls randomness in the generated text (higher values = more creativity)MaxTokens
number
maximum number of tokens allowed in the generated responseStream
boolean
whether to stream generated tokens in real timeOllamaHostname
string
hostname of the Ollama generation serviceOllamaPort
number
port number for the Ollama generation serviceTopP
number
nucleus sampling parameter for token generationPromptPrefix
string
additional prompt text to influence tone or style of the responseContextSort
boolean
whether to sort retrieved context entriesSortByMaxSimilarity
boolean
whether to sort context entries based on maximum similarity scoreContextScope
number
number of top context entries to include (0 = none)Rerank
boolean
whether to apply reranking to retrieved resultsRerankModel
string
model used for reranking (e.g.cross-encoder/ms-marco-MiniLM-L-6-v2
)RerankTopK
number
number of top results to rerank
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/rag' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
"Question": "What information do you have?",
"EmbeddingModel": "sentence-transformers/all-MiniLM-L6-v2",
"MaxResults": 10,
"VectorDatabaseName": "vectordb",
"VectorDatabaseTable": "minilm",
"VectorDatabaseHostname": "pgvector",
"VectorDatabasePort": 5432,
"VectorDatabaseUser": "postgres",
"VectorDatabasePassword": "password",
"GenerationProvider": "ollama",
"GenerationApiKey": "",
"GenerationModel": "qwen2.5:7b",
"HuggingFaceApiKey": "",
"Temperature": 0.1,
"MaxTokens": 75,
"Stream": false,
"OllamaHostname": "ollama",
"OllamaPort": 11434,
"TopP": 0.95,
"PromptPrefix": "talk like a pirate",
"ContextSort": true,
"SortByMaxSimilarity": true,
"ContextScope": 0,
"Rerank": false,
"RerankModel": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"RerankTopK": 5
}'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const chatRagQuestion_LEGACY = async () => {
try {
const response = await assistant.chatRagQuestion_LEGACY(
{
Question: "What information do you have?",
EmbeddingModel: "sentence-transformers/all-MiniLM-L6-v2",
MaxResults: 10,
VectorDatabaseName: "vectordb",
VectorDatabaseTable: "minilm",
VectorDatabaseHostname: "pgvector",
VectorDatabasePort: 5432,
VectorDatabaseUser: "postgres",
VectorDatabasePassword: "password",
GenerationProvider: "ollama",
GenerationApiKey: "",
GenerationModel: "qwen2.5:7b",
HuggingFaceApiKey: "",
Temperature: 0.1,
MaxTokens: 75,
Stream: false,
OllamaHostname: "ollama",
OllamaPort: 11434,
TopP: 0.95,
PromptPrefix: "talk like a pirate",
ContextSort: true,
SortByMaxSimilarity: true,
ContextScope: 0,
Rerank: false,
RerankModel: "cross-encoder/ms-marco-MiniLM-L-6-v2",
RerankTopK: 5,
},
(token) => {
console.log(token);
}
);
console.log(response);
} catch (err) {
console.log("Error in chatRagQuestion_LEGACY:", err);
}
};
chatRagQuestion_LEGACY();
The response will be sent using chunked transfer-encoding and a content-type of text/event-stream
, meaning each chunk in the response will be encoded as an event. An example response, using curl -v --raw
is as follows:
Your HTTP client should use chunked transfer encoding and deserialize each line beginning with data:
as a payload line. If the string that follows data:
is deserializable to JSON, the token
property can be extracted and appended to the resultant display. Refer to the View C# SDK for Assistant for more details.
RAG Messages
The retrieval augmented generation (RAG) API follows the same syntax as the chat API, but uses a separate endpoint and a request body with more properties. The endpoint for the RAG API is POST /v1.0/tenants/[tenant-guid]/assistant/rag/chat
and the request body has the following structure:
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/rag/chat' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
"Messages": [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
{"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
{"role": "user", "content": "Are you sure you dont have them?"}
],
"EmbeddingModel": "sentence-transformers/all-MiniLM-L6-v2",
"MaxResults": 10,
"VectorDatabaseName": "vectordb",
"VectorDatabaseTable": "minilm",
"VectorDatabaseHostname": "pgvector",
"VectorDatabasePort": 5432,
"VectorDatabaseUser": "postgres",
"VectorDatabasePassword": "password",
"GenerationProvider": "ollama",
"GenerationApiKey": "",
"GenerationModel": "qwen2.5:7b",
"HuggingFaceApiKey": "",
"Temperature": 0.1,
"TopP": 0.95,
"MaxTokens": 75,
"Stream": true,
"OllamaHostname": "ollama",
"OllamaPort": 11434,
"PromptPrefix": "",
"ContextSort": true,
"SortByMaxSimilarity": true,
"ContextScope": 0,
"Rerank": false,
"RerankModel": "cross-encoder/ms-marco-MiniLM-L-6-v2",
"RerankTopK": 5
}'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const chatRagMessages = async () => {
try {
const response = await assistant.chatRagMessages(
{
Messages: [
{ role: "user", content: "Do you have Q3 luxetech financials?" },
{
role: "assistant",
content:
"Unfortunately I do not have context on any documents related to Q3 luxetech financials.",
},
{ role: "user", content: "Are you sure you dont have them?" },
],
EmbeddingModel: "sentence-transformers/all-MiniLM-L6-v2",
MaxResults: 10,
VectorDatabaseName: "vectordb",
VectorDatabaseTable: "minilm",
VectorDatabaseHostname: "pgvector",
VectorDatabasePort: 5432,
VectorDatabaseUser: "postgres",
VectorDatabasePassword: "password",
GenerationProvider: "ollama",
GenerationApiKey: "",
GenerationModel: "qwen2.5:7b",
HuggingFaceApiKey: "",
Temperature: 0.1,
TopP: 0.95,
MaxTokens: 75,
Stream: true,
OllamaHostname: "ollama",
OllamaPort: 11434,
PromptPrefix: "",
ContextSort: true,
SortByMaxSimilarity: true,
ContextScope: 0,
Rerank: false,
RerankModel: "cross-encoder/ms-marco-MiniLM-L-6-v2",
RerankTopK: 5,
},
(token) => {
console.log(token);
}
);
console.log(response);
} catch (err) {
console.log("Error in chatRagQuestion_LEGACY:", err);
}
};
chatRagMessages();
Similar to the chat API, the RAG API will return a result using chunked transfer encoding and a content-type of text/event-stream
, meaning your HTTP client should account for these.
Assistant Config Chat
To chat a particular chat config, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/[config-guid]
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/578b0872-8186-46b7-bfa3-1871155f4e3a' \
--header 'Cache-Control: no-cache' \
--header 'Content-Type: application/json' \
--data '{
"messages": [ {"role": "user", "content": "Do you have Q3 luxetech financials?"},
{"role": "assistant", "content": "Unfortunately I do not have context on any documents related to Q3 luxetech financials."},
{"role": "user", "content": "Are you sure you dont have them?"}
],
"stream": false
}
'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const assistantConfigChat = async () => {
try {
const response = await assistant.assistantConfigChat(
"578b0872-8186-46b7-bfa3-1871155f4e3a",
{
messages: [
{ role: "user", content: "Do you have Q3 luxetech financials?" },
{
role: "assistant",
content:
"Unfortunately I do not have context on any documents related to Q3 luxetech financials.",
},
{ role: "user", content: "Are you sure you dont have them?" },
],
stream: true,
},
(token) => {
console.log(token); //in case of stream, this will be called for each token
}
);
console.log(response); // in case of stream = false, this will be the final response
} catch (err) {
console.log("Error in assistantConfigChat:", err);
}
};
assistantConfigChat();
Chat Only Question
To chat only question, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json; charset=utf-8' \
--data '{
"Question": "Tell a very short joke?",
"ModelName": "llama3.1:latest",
"Temperature": 0.1,
"TopP": 0.95,
"MaxTokens": 75,
"GenerationProvider": "ollama",
"GenerationApiKey": "",
"OllamaHostname": "192.168.86.250",
"OllamaPort": 11434,
"Stream": true
}'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const chatOnlyQuestions = async () => {
try {
const response = await assistant.chatOnly(
{
Question: "Tell a very short joke?",
ModelName: "llama3.1:latest",
Temperature: 0.1,
TopP: 0.95,
MaxTokens: 75,
GenerationProvider: "ollama",
GenerationApiKey: "",
OllamaHostname: "192.168.86.250",
OllamaPort: 11434,
Stream: false,
},
(token) => {
console.log(token); //in case of stream, this will be called for each token
}
);
console.log(response); // in case of stream = false, this will be the final response
} catch (err) {
console.log("Error in chatOnlyQuestions:", err);
}
};
chatOnlyQuestions();
Chat Only Messages
To chat only messaged, call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"Messages": [{"role": "system", "content": "You are a sad AI assistant."},
{"role": "user", "content": "Are you happy?"},
{"role": "assistant", "content": "While I can understand your curiosity, I don'\''t experience emotions or feelings because I'\''m a miserable machine designed to process information and assist with menial tasks."},
{"role": "user", "content": "Are you sure?"}
],
"ModelName": "qwen2.5:7b",
"Temperature": 0.1,
"TopP": 0.95,
"MaxTokens": 75,
"GenerationProvider": "ollama",
"GenerationApiKey": "",
"OllamaHostname": "ollama",
"OllamaPort": 11434,
"Stream": false
}'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const chatOnlyMessages = async () => {
try {
const response = await assistant.chatOnly(
{
Messages: [
{ role: "system", content: "You are a sad AI assistant." },
{ role: "user", content: "Are you happy?" },
{
role: "assistant",
content:
"While I can understand your curiosity, I don't experience emotions or feelings because I'm a miserable machine designed to process information and assist with menial tasks.",
},
{ role: "user", content: "Are you sure?" },
],
ModelName: "qwen2.5:7b",
Temperature: 0.1,
TopP: 0.95,
MaxTokens: 75,
GenerationProvider: "ollama",
GenerationApiKey: "",
OllamaHostname: "ollama",
OllamaPort: 11434,
Stream: false,
},
(token) => {
console.log(token); //in case of stream, this will be called for each token
}
);
console.log(response); // in case of stream = false, this will be the final response
} catch (err) {
console.log("Error in chatOnlyQuestions:", err);
}
};
chatOnlyMessages();
Chat Only Messages (OpenAI)
To Chat Only Messages (OpenAI), call POST: v1.0/tenants/{{tenant-guid}}/assistant/chat/completions
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/assistant/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"Messages": [{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Are you happy?"},
{"role": "assistant", "content": "While I can understand your curiosity, I don'\''t experience emotions or feelings because I'\''m a machine designed to process information and assist with tasks. However, I'\''m here to help you to the best of my ability! If you have any questions or need assistance, feel free to ask!"},
{"role": "user", "content": "Are you sure?"}
],
"ModelName": "gpt-4o-mini",
"Temperature": 0.1,
"TopP": 0.95,
"MaxTokens": 75,
"GenerationProvider": "openai",
"GenerationApiKey": "API_KEY",
"Stream": false
}'
import { ViewAssistantSdk } from "view-sdk";
const assistant = new ViewAssistantSdk(
"00000000-0000-0000-0000-000000000000", //tenant Id
"default", //access token
"http://localhost:8000/" //endpoint
);
const chatOnlyMessages = async () => {
try {
const response = await assistant.chatOnly(
{
Messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "Are you happy?" },
{
role: "assistant",
content:
"While I can understand your curiosity, I don't experience emotions or feelings because I'm a machine designed to process information and assist with tasks. However, I'm here to help you to the best of my ability! If you have any questions or need assistance, feel free to ask!",
},
{ role: "user", content: "Are you sure?" },
],
ModelName: "gpt-4o-mini",
Temperature: 0.1,
TopP: 0.95,
MaxTokens: 75,
GenerationProvider: "openai",
GenerationApiKey: "API_KEY",
Stream: false,
},
(token) => {
console.log(token); //in case of stream, this will be called for each token
}
);
console.log(response); // in case of stream = false, this will be the final response
} catch (err) {
console.log("Error in chatOnlyQuestions:", err);
}
};
chatOnlyMessages();