This page covers configuration and management of View crawl operation objects.

Object Overview

Crawl operations provide metadata of an invocation of a crawl plan.

Endpoint, URL, and Supported Methods

Objects are managed via the crawler server API at [http|https]://[hostname]:[port]/v1.0/tenants/[tenant-guid]/crawloperations

Supported methods include: GET HEAD DELETE

Structure

Objects have the following structure:

{
    "GUID": "9ced1af3-2e19-4cd6-81ce-a8c90a9ed32d",
    "TenantGUID": "default",
    "CrawlPlanGUID": "e9a7d61e-7cbd-46e4-9956-533e22008978",
    "CrawlScheduleGUID": "oneminute",
    "CrawlFilterGUID": "default",
    "DataRepositoryGUID": "1a56c067-9e6d-4f7b-85bf-eb6b04aeda3f",
    "MetadataRuleGUID": "example-metadata-rule",
    "EmbeddingsRuleGUID": "crawler-embeddings-rule",
    "ProcessingEndpoint": "http://nginx-orchestrator:8501/processor",
    "CleanupEndpoint": "http://nginx-orchestrator:8501/processor/cleanup",
    "Name": "Alienware CIFS (started 2024-10-25T22:29:31 UTC)",
    "ObjectsEnumerated": 123,
    "BytesEnumerated": 61052490,
    "ObjectsAdded": 0,
    "BytesAdded": 0,
    "ObjectsUpdated": 0,
    "BytesUpdated": 0,
    "ObjectsDeleted": 0,
    "BytesDeleted": 0,
    "ObjectsSuccess": 0,
    "BytesSuccess": 0,
    "ObjectsFailed": 2,
    "BytesFailed": 16384,
    "State": "Success",
    "CreatedUtc": "2024-10-25T22:29:31.000000Z",
    "StartUtc": "2024-10-25T22:29:31.000000Z",
    "StartEnumerationUtc": "2024-10-25T22:29:31.000000Z",
    "StartRetrievalUtc": "2024-10-25T22:29:39.000000Z",
    "FinishEnumerationUtc": "2024-10-25T22:29:39.000000Z",
    "FinishRetrievalUtc": "2024-10-25T22:29:39.000000Z",
    "FinishUtc": "2024-10-25T22:29:39.000000Z"
}

Properties:

  • GUID GUID globally unique identifier for the object
  • TenantGUID GUID globally unique identifier for the tenant
  • CrawlPlanGUID GUID globally unique identifier for the crawl plan
  • CrawlScheduleGUID GUID globally unique identifier for the crawl schedule
  • CrawlFilterGUID GUID globally unique identifier for the crawl filter
  • DataRepositoryGUID GUID globally unique identifier for the data repository
  • MetadataRuleGUID GUID globally unique identifier for the metadata rule
  • EmbeddingsRuleGUID GUID globally unique identifier for the embeddings rule
  • ProcessingEndpoint string URL to use to process new and changed objects
  • CleanupEndpoint string URL to use to process deleted objects
  • Name string the name of object
  • ObjectsEnumerated int the number of objects enumerated
  • BytesEnumerated int the number of bytes enumerated
  • ObjectsAdded int the number of objects added since the latest enumeration
  • BytesAdded int the number of bytes added since the latest enumeration
  • ObjectsUpdated int the number of objects updated since the latest enumeration
  • BytesUpdated int the number of bytes updated since the latest enumeration
  • ObjectsDeleted int the number of objects deleted from the latest enumeration
  • BytesDeleted int the number of bytes deleted from the latest enumeration
  • ObjectsSuccess int the number of objects succeeded
  • BytesSuccess int the number of bytes succeeded
  • ObjectsFailed int the number of objects failed
  • BytesFailed int the number of bytes failed
  • State enum the state of the crawl operation, values include NotStarted Starting Stopped Canceled Enumerating Retrieving Deleting Success Failed
  • CreatedUtc datetime timestamp from creation, in UTC time
  • StartUtc datetime timestamp from start, in UTC time
  • StartEnumerationUtc datetime timestamp from beginning of enumeration, in UTC time
  • StartRetrievalUtc datetime timestamp from beginning of retrieval, in UTC time
  • FinishEnumerationUtc datetime timestamp from completion of enumeration, in UTC time
  • FinishRetrievalUtc datetime timestamp from completion of retrieval, in UTC time
  • FinishUtc datetime timestamp from completion, in UTC time

Enumerate

Refer to the Enumeration page in REST API for details about the use of enumeration APIs.

Enumerate objects by using GET /v2.0/tenants/[tenant-guid]/crawlschedules. The resultant object will appear as:

{
    "Success": true,
    "Timestamp": {
        "Start": "2024-10-21T02:36:37.677751Z",
        "TotalMs": 23.58,
        "Messages": {}
    },
    "MaxResults": 10,
    "IterationsRequired": 1,
    "EndOfResults": true,
    "RecordsRemaining": 16,
    "Objects": [
        {
            "GUID": "example-crawloperation",
            ... crawloperation details ...
        },
        { ... }
    ],
    "ContinuationToken": "[continuation-token]"
}
curl --location 'http://view.homedns.org:8000/v2.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/' \
--header 'Authorization: ••••••'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const enumerateCrawlOperations = async () => {
  try {
    const response = await crawler.enumerateCrawlOperations();
    console.log(response, "Crawl operations fetched successfully");
  } catch (err) {
    console.log("Error fetching Crawl operations:", err);
  }
};

enumerateCrawlOperations();

Read

To read an object by GUID, call GET /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid]. If the object exists, it will be returned as a JSON object in the response body. If it does not exist, a 404 will be returned with a NotFound error response.

{
    "GUID": "9ced1af3-2e19-4cd6-81ce-a8c90a9ed32d",
    "TenantGUID": "default",
    "CrawlPlanGUID": "e9a7d61e-7cbd-46e4-9956-533e22008978",
    "CrawlScheduleGUID": "oneminute",
    "CrawlFilterGUID": "default",
    "DataRepositoryGUID": "1a56c067-9e6d-4f7b-85bf-eb6b04aeda3f",
    "MetadataRuleGUID": "example-metadata-rule",
    "EmbeddingsRuleGUID": "crawler-embeddings-rule",
    "ProcessingEndpoint": "http://nginx-orchestrator:8501/processor",
    "CleanupEndpoint": "http://nginx-orchestrator:8501/processor/cleanup",
    "Name": "Alienware CIFS (started 2024-10-25T22:29:31 UTC)",
    "ObjectsEnumerated": 123,
    "BytesEnumerated": 61052490,
    "ObjectsAdded": 0,
    "BytesAdded": 0,
    "ObjectsUpdated": 0,
    "BytesUpdated": 0,
    "ObjectsDeleted": 0,
    "BytesDeleted": 0,
    "ObjectsSuccess": 0,
    "BytesSuccess": 0,
    "ObjectsFailed": 2,
    "BytesFailed": 16384,
    "State": "Success",
    "CreatedUtc": "2024-10-25T22:29:31.000000Z",
    "StartUtc": "2024-10-25T22:29:31.000000Z",
    "StartEnumerationUtc": "2024-10-25T22:29:31.000000Z",
    "StartRetrievalUtc": "2024-10-25T22:29:39.000000Z",
    "FinishEnumerationUtc": "2024-10-25T22:29:39.000000Z",
    "FinishRetrievalUtc": "2024-10-25T22:29:39.000000Z",
    "FinishUtc": "2024-10-25T22:29:39.000000Z"
}
curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/00000000-0000-0000-0000-000000000000' \
--header 'Authorization: ••••••'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const readCrawlOperation = async () => {
  try {
    const response = await crawler.retrieveCrawlOperation(
      "143fc146-dc60-4264-99b8-49bb816d356f"
    );
    console.log(response, "Crawl operation fetched successfully");
  } catch (err) {
    console.log("Error fetching Crawl operation:", err);
  }
};

readCrawlOperation();

Note: the HEAD method can be used as an alternative to get to simply check the existence of the object. HEAD requests return either a 200/OK in the event the object exists, or a 404/Not Found if not. No response body is returned with a HEAD request.

Retrieve enumeration

To read enumeration of an object by GUID, call GET /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid]/enumeration. If the object exists, it will be returned as a JSON object in the response body. If it does not exist, a 404 will be returned with a NotFound error response.

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/78d881af-ad82-48bc-8097-1fccd2787624/enumeration' \
--header 'Authorization: ••••••'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const retrieveEnumerationCrawlOperations = async () => {
  try {
    const response = await crawler.retrieveEnumerationCrawlOperations(
      "143fc146-dc60-4264-99b8-49bb816d356f"
    );
    console.log(response, "Crawl operation enumeration fetched successfully");
  } catch (err) {
    console.log("Error fetching Crawl operation enumeration:", err);
  }
};

retrieveEnumerationCrawlOperations();

Read all

o read all objects, call GET /v1.0/tenants/[tenant-guid]/crawloperations/. If the object exists, it will be returned as an array of JSON object in the response body

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/' \
--header 'Authorization: ••••••'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const readAllCrawlOperations = async () => {
  try {
    const response = await crawler.retrieveCrawlOperations();
    console.log(response, "All crawl operations fetched successfully");
  } catch (err) {
    console.log("Error fetching All crawl operations:", err);
  }
};

readAllCrawlOperations();

{
    "GUID": "4292118d-3397-4090-88c6-90f1886a3e35",
    "TenantGUID": "default",
    "DataRepositoryGUID": "c854f5f2-68f6-44c4-813e-9c1dea51676a",
    "CrawlScheduleGUID": "oneminute",
    "CrawlFilterGUID": "default",
    "MetadataRuleGUID": "example-metadata-rule",
    "EmbeddingsRuleGUID": "crawler-embeddings-rule",
    "Name": "My updated local files",
    "EnumerationDirectory": "./enumerations/",
    "EnumerationsToRetain": 16,
    "MaxDrainTasks": 4,
    "ProcessAdditions": true,
    "ProcessDeletions": true,
    "ProcessUpdates": true,
    "CreatedUtc": "2024-10-23T15:14:26.000000Z"
}

Start

To start a crawl operation by GUID, call DELETE /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid]/start.

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/00000000-0000-0000-0000-000000000000/start' \
--header 'content-type: application/json' \
--header 'Authorization: ••••••' \
--data '{
    "Name": "My tenant"
}'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const startCrawlOperation = async () => {
  try {
    const response = await crawler.startCrawlOperation(
      "143fc146-dc60-4264-99b8-49bb816d356f",
      {
        Name: "My crawl operation [ASH]",
      }
    );
    console.log(response, "Crawl operation started successfully");
  } catch (err) {
    console.log("Error starting Crawl operation:", err);
  }
};

startCrawlOperation();

Stop

To stop a crawl operation by GUID, call DELETE /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid]/stop.

curl --location 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/00000000-0000-0000-0000-000000000000/stop' \
--header 'content-type: application/json' \
--header 'Authorization: ••••••' \
--data '{
    "Name": "My tenant"
}'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const stopCrawlOperation = async () => {
  try {
    const response = await crawler.stopCrawlOperation(
      "143fc146-dc60-4264-99b8-49bb816d356f",
      {
        Name: "My crawl operation [ASH]",
      }
    );
    console.log(response, "Crawl operation stopped successfully");
  } catch (err) {
    console.log("Error stopping Crawl operation:", err);
  }
};
stopCrawlOperation();

Delete

To delete an object by GUID, call DELETE /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid].

curl --location --request DELETE 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/00000000-0000-0000-0000-000000000000' \
--header 'Authorization: ••••••' \
--data ''
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const deleteCrawlOperation = async () => {
  try {
    const response = await crawler.deleteCrawlOperation(
      "143fc146-dc60-4264-99b8-49bb816d356f"
    );
    console.log(response, "Crawl operation deleted successfully");
  } catch (err) {
    console.log("Error deleting Crawl operation:", err);
  }
};

deleteCrawlOperation();

Check Existance

To check an object exist on not call, HEAD /v1.0/tenants/[tenant-guid]/crawloperations/[crawloperation-guid]

curl --location --head 'http://view.homedns.org:8000/v1.0/tenants/00000000-0000-0000-0000-000000000000/crawloperations/00000000-0000-0000-0000-000000000000' \
--header 'Authorization: ••••••'
import { ViewCrawlerSdk } from "view-sdk";

const crawler = new ViewCrawlerSdk(
  "default", //tenant Id
  "default", //access token
  "http://localhost:8000/" //endpoint
);

const existsCrawlOperation = async () => {
  try {
    const response = await crawler.existsCrawlOperation(
      "143fc146-dc60-4264-99b8-49bb816d356f"
    );
    console.log(response, "Crawl operation exists");
  } catch (err) {
    console.log("Error checking Crawl operation:", err);
  }
};

existsCrawlOperation();