Reranker and Ollama Embeddings - Weaviate Schema

The following schema describes our standard mapping with a reranker and embeddings calculated on Weaviate side via Ollama.

Please make sure to fill the following values at the end of the schema before creating the index Ollama apiEndpoint and model:

In order to use this schema, do not include our embedding transformer stage (see Vectorizer and Embeddings ) in the connector’s content transformation pipeline.

{
  "class": "__DOCUMENT_INDEX_NAME__",
  "name": "__DOCUMENT_INDEX_NAME__",
  "vectorConfig": {
    "default": {
      "vectorIndexConfig": {
        "bq": {
          "enabled": false
        },
        "cleanupIntervalSeconds": 300,
        "distance": "cosine",
        "dynamicEfFactor": 8,
        "dynamicEfMax": 500,
        "dynamicEfMin": 100,
        "ef": -1,
        "efConstruction": 128,
        "filterStrategy": "acorn",
        "flatSearchCutoff": 40000,
        "maxConnections": 32,
        "multivector": {
          "aggregation": "maxSim",
          "enabled": false,
          "muvera": {
            "dprojections": 16,
            "enabled": false,
            "ksim": 4,
            "repetitions": 10
          }
        },
        "pq": {
          "bitCompression": false,
          "centroids": 256,
          "enabled": false,
          "encoder": {
            "distribution": "log-normal",
            "type": "kmeans"
          },
          "segments": 0,
          "trainingLimit": 100000
        },
        "rq": {
          "bits": 8,
          "enabled": false,
          "rescoreLimit": 20
        },
        "skip": false,
        "skipDefaultQuantization": false,
        "sq": {
          "enabled": false,
          "rescoreLimit": 20,
          "trainingLimit": 100000
        },
        "trackDefaultQuantization": false,
        "vectorCacheMaxObjects": 1000000000000
      },
      "vectorIndexType": "hnsw",
      "vectorizer": {
        "text2vec-ollama": {
          "apiEndpoint": "FILL OUT",
          "model": "FILL OUT",
          "vectorizeClassName": false
        }
      }
    }
  },
  "moduleConfig": {
    "reranker-nvidia": {}
  },
  "properties": [
    {
      "dataType": [
        "string[]"
      ],
      "name": "allow_acl",
      "indexFilterable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "string[]"
      ],
      "name": "deny_acl",
      "indexFilterable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "authors",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "authorsMail",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "body",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word",
      "moduleConfig": {
        "text2vec-ollama": {
          "skip": false,
          "vectorizePropertyName": false
        }
      }
    },
    {
      "dataType": [
        "text"
      ],
      "name": "body_de",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "body_fr",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "body_es",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "body_it",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "fileExtension",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "lowercase"
    },
    {
      "dataType": [
        "date"
      ],
      "name": "createdDate",
      "indexFilterable": true
    },
    {
      "dataType": [
        "string"
      ],
      "name": "iconSvg",
      "indexFilterable": false,
      "tokenization": "field"
    },
    {
      "dataType": [
        "string"
      ],
      "name": "iconUrl",
      "indexFilterable": false,
      "tokenization": "field"
    },
     {
      "dataType": [
        "text"
      ],
      "name": "internalId",
      "indexFilterable": false,
      "indexSearchable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "itemType",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "lowercase"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "keywords",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "documentTopic",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "documentScope",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "documentCategory",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "persons",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "organizations",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "account",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "date"
      ],
      "name": "lastModifiedDate",
      "indexFilterable": true
    },
    {
      "dataType": [
        "text[]"
      ],
      "name": "originalItemType",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "lowercase"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "originalId",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "parentItemTitle",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "parentItemUrl",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "rootItemTitle",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "title",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word",
      "moduleConfig": {
        "text2vec-ollama": {
          "skip": false,
          "vectorizePropertyName": false
        }
      }
    },
    {
      "dataType": [
        "text"
      ],
      "name": "rootItemUrl",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "title_de",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "title_fr",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "title_es",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "title_it",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "word"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "url",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "field"
    },
    {
      "dataType": [
        "text"
      ],
      "name": "embedding",
      "indexFilterable": true,
      "indexSearchable": true,
      "tokenization": "field"
    }
  ]
}