Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
8957d61
add spfresh to python client
robbespo00 Oct 16, 2025
f1abc27
add enum for centroids index type in spfresh
robbespo00 Oct 17, 2025
9518b8f
Add missing init parameter for spfresh
rlmanrique Nov 17, 2025
896c333
Fix spfresh params in config of the class
rlmanrique Nov 17, 2025
59a1317
Add missing multivector param for spfresh
rlmanrique Nov 17, 2025
f07d50d
Fix multivector
rlmanrique Nov 18, 2025
2bcd33c
Fix update spfresh method
rlmanrique Nov 24, 2025
c188305
remove centroids index type
robbespo00 Nov 28, 2025
fdefaa4
add spfresh to python client
robbespo00 Oct 16, 2025
ffc98b3
add enum for centroids index type in spfresh
robbespo00 Oct 17, 2025
1059b43
Add missing init parameter for spfresh
rlmanrique Nov 17, 2025
2cdc5e8
Fix spfresh params in config of the class
rlmanrique Nov 17, 2025
e8847a8
Add missing multivector param for spfresh
rlmanrique Nov 17, 2025
e735a58
Fix multivector
rlmanrique Nov 18, 2025
88c276c
Fix update spfresh method
rlmanrique Nov 24, 2025
6d6afeb
remove centroids index type
robbespo00 Nov 28, 2025
fb49f84
Rename spfresh to hfresh
rlmanrique Dec 17, 2025
e37ada4
Merge branch 'rob/spfresh' of https://github.com/weaviate/weaviate-py…
robbespo00 Dec 17, 2025
f46fc4a
remove centroids index type
robbespo00 Dec 17, 2025
09ab4bb
Merge remote-tracking branch 'origin/main' into rob/spfresh
robbespo00 Jan 13, 2026
545168c
use max posting size kb
robbespo00 Jan 13, 2026
b24f14c
Merge remote-tracking branch 'origin/main' into rob/spfresh
robbespo00 Jan 16, 2026
d4baec8
remove rng factor
robbespo00 Jan 16, 2026
0252f7f
run ruff linter and formatting
robbespo00 Jan 23, 2026
01b3158
add D417
robbespo00 Jan 23, 2026
e4a1273
remove distance metric
robbespo00 Jan 23, 2026
9f6f5c2
Merge remote-tracking branch 'origin/main' into rob/spfresh
robbespo00 Jan 23, 2026
68fb441
Merge branch 'main' into rob/spfresh
rlmanrique Jan 29, 2026
83076f8
Merge branch 'main' into rob/spfresh
rlmanrique Feb 10, 2026
de95a02
Fix behaviour when empty vectorizer
rlmanrique Feb 10, 2026
c0be14f
Add test for it
rlmanrique Feb 10, 2026
5ca8347
Merge pull request #1939 from weaviate/fix/empty_vectorizer_named_vector
rlmanrique Feb 10, 2026
8d90ee5
Merge branch 'main' into rob/spfresh
rlmanrique Feb 10, 2026
268aae2
Add test for HFresh collection creation
rlmanrique Feb 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions integration/test_collection_hfresh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest
import weaviate
import weaviate.exceptions
from integration.conftest import CollectionFactory
from weaviate.collections.classes.config import (
Configure,
VectorDistances,
VectorIndexType,
_VectorIndexConfigHFresh,
)

def test_collection_config_hfresh(collection_factory: CollectionFactory) -> None:
collection_dummy = collection_factory("dummy")
if collection_dummy._connection._weaviate_version.is_lower_than(1, 36, 0):
pytest.skip("Hfresh index is not supported in Weaviate versions lower than 1.36.0")

collection = collection_factory(
vector_index_config=Configure.VectorIndex.hfresh(
distance_metric=VectorDistances.COSINE,
max_posting_size_kb=1024,
replicas=2,
search_probe=50,
)
)

config = collection.config.get()

assert config.vector_index_type == VectorIndexType.HFRESH
assert isinstance(config.vector_index_config, _VectorIndexConfigHFresh)
assert config.vector_index_config.distance_metric == VectorDistances.COSINE
assert config.vector_index_config.max_posting_size_kb == 1024
assert config.vector_index_config.replicas == 2
assert config.vector_index_config.search_probe == 50
73 changes: 73 additions & 0 deletions test/collection/test_config_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

import pytest
from weaviate.collections.classes.config_methods import _collection_configs_simple_from_json

def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None:
"""Test that _collection_configs_simple_from_json handles None vectorizer config."""
schema = {
"classes": [
{
"class": "TestCollection",
"vectorConfig": {
"default": {
"vectorizer": {
"text2vec-transformers": None
},
"vectorIndexType": "hnsw",
"vectorIndexConfig": {
"skip": False,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine"
}
}
},
"properties": [],
"invertedIndexConfig": {
"bm25": {"b": 0.75, "k1": 1.2},
"cleanupIntervalSeconds": 60,
"stopwords": {"preset": "en", "additions": None, "removals": None}
},
"replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"},
"shardingConfig": {
"virtualPerPhysical": 128,
"desiredCount": 1,
"actualCount": 1,
"desiredVirtualCount": 128,
"actualVirtualCount": 128,
"key": "_id",
"strategy": "hash",
"function": "murmur3"
},
"vectorIndexType": "hnsw",
"vectorIndexConfig": {
"skip": False,
"cleanupIntervalSeconds": 300,
"maxConnections": 64,
"efConstruction": 128,
"ef": -1,
"dynamicEfMin": 100,
"dynamicEfMax": 500,
"dynamicEfFactor": 8,
"vectorCacheMaxObjects": 1000000000000,
"flatSearchCutoff": 40000,
"distance": "cosine"
}
}
]
}

configs = _collection_configs_simple_from_json(schema)
assert "TestCollection" in configs
vec_config = configs["TestCollection"].vector_config
assert vec_config is not None
assert "default" in vec_config
assert vec_config["default"].vectorizer.model == {}
assert vec_config["default"].vectorizer.source_properties is None
46 changes: 44 additions & 2 deletions weaviate/collections/classes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
_VectorIndexConfigDynamicUpdate,
_VectorIndexConfigFlatUpdate,
_VectorIndexConfigHNSWUpdate,
_VectorIndexConfigHFreshUpdate,
_VectorIndexConfigUpdate,
)
from weaviate.collections.classes.config_vector_index import (
Expand Down Expand Up @@ -1846,6 +1847,21 @@ def vector_index_type() -> str:
VectorIndexConfigHNSW = _VectorIndexConfigHNSW


@dataclass
class _VectorIndexConfigHFresh(_VectorIndexConfig):
distance_metric: VectorDistances
max_posting_size_kb: int
replicas: int
search_probe: int

@staticmethod
def vector_index_type() -> str:
return VectorIndexType.HFRESH.value


VectorIndexConfigHFresh = _VectorIndexConfigHFresh


@dataclass
class _VectorIndexConfigFlat(_VectorIndexConfig):
distance_metric: VectorDistances
Expand Down Expand Up @@ -1919,7 +1935,10 @@ def to_dict(self) -> Dict[str, Any]:
class _NamedVectorConfig(_ConfigBase):
vectorizer: _NamedVectorizerConfig
vector_index_config: Union[
VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic
VectorIndexConfigHNSW,
VectorIndexConfigFlat,
VectorIndexConfigDynamic,
VectorIndexConfigHFresh,
]

def to_dict(self) -> Dict:
Expand Down Expand Up @@ -1956,7 +1975,11 @@ class _CollectionConfig(_ConfigBase):
reranker_config: Optional[RerankerConfig]
sharding_config: Optional[ShardingConfig]
vector_index_config: Union[
VectorIndexConfigHNSW, VectorIndexConfigFlat, VectorIndexConfigDynamic, None
VectorIndexConfigHNSW,
VectorIndexConfigFlat,
VectorIndexConfigDynamic,
VectorIndexConfigHFresh,
None,
]
vector_index_type: Optional[VectorIndexType]
vectorizer_config: Optional[VectorizerConfig]
Expand Down Expand Up @@ -2610,6 +2633,25 @@ def dynamic(
quantizer=quantizer,
)

@staticmethod
def hfresh(
max_posting_size_kb: Optional[int] = None,
search_probe: Optional[int] = None,
quantizer: Optional[_RQConfigUpdate] = None,
) -> _VectorIndexConfigHFreshUpdate:
"""Create an `_VectorIndexConfigHFreshUpdate` object to update the configuration of the HFresh vector index.

Use this method when defining the `vectorizer_config` argument in `collection.update()`.

Args:
See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view!
""" # noqa: D417 (missing argument descriptions in the docstring)
return _VectorIndexConfigHFreshUpdate(
maxPostingSizeKB=max_posting_size_kb,
searchProbe=search_probe,
quantizer=quantizer,
)


class Reconfigure:
"""Use this factory class to generate the correct `xxxConfig` object for use when using the `collection.update()` method.
Expand Down
25 changes: 24 additions & 1 deletion weaviate/collections/classes/config_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
_VectorIndexConfigDynamic,
_VectorIndexConfigFlat,
_VectorIndexConfigHNSW,
_VectorIndexConfigHFresh,
_VectorizerConfig,
)

Expand Down Expand Up @@ -213,6 +214,18 @@ def __get_hnsw_config(config: Dict[str, Any]) -> _VectorIndexConfigHNSW:
)


def __get_hfresh_config(config: Dict[str, Any]) -> _VectorIndexConfigHFresh:
quantizer = __get_quantizer_config(config)
return _VectorIndexConfigHFresh(
distance_metric=VectorDistances(config.get("distance")),
max_posting_size_kb=config["maxPostingSizeKB"],
replicas=config["replicas"],
search_probe=config["searchProbe"],
quantizer=quantizer,
multi_vector=None,
)


def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat:
quantizer = __get_quantizer_config(config)
return _VectorIndexConfigFlat(
Expand All @@ -225,7 +238,13 @@ def __get_flat_config(config: Dict[str, Any]) -> _VectorIndexConfigFlat:

def __get_vector_index_config(
schema: Dict[str, Any],
) -> Union[_VectorIndexConfigHNSW, _VectorIndexConfigFlat, _VectorIndexConfigDynamic, None]:
) -> Union[
_VectorIndexConfigHNSW,
_VectorIndexConfigFlat,
_VectorIndexConfigDynamic,
_VectorIndexConfigHFresh,
None,
]:
if "vectorIndexConfig" not in schema:
return None
if schema["vectorIndexType"] == "hnsw":
Expand All @@ -239,6 +258,8 @@ def __get_vector_index_config(
hnsw=__get_hnsw_config(schema["vectorIndexConfig"]["hnsw"]),
flat=__get_flat_config(schema["vectorIndexConfig"]["flat"]),
)
elif schema["vectorIndexType"] == "hfresh":
return __get_hfresh_config(schema["vectorIndexConfig"])
else:
return None

Expand All @@ -256,6 +277,8 @@ def __get_vector_config(

vectorizer_str: str = str(list(vectorizer)[0])
vec_config: Dict[str, Any] = named_vector["vectorizer"][vectorizer_str]
if vec_config is None:
vec_config = {}
props = vec_config.pop("properties", None)

vector_index_config = __get_vector_index_config(named_vector)
Expand Down
2 changes: 2 additions & 0 deletions weaviate/collections/classes/config_named_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_VectorIndexConfigDynamicUpdate,
_VectorIndexConfigFlatUpdate,
_VectorIndexConfigHNSWUpdate,
_VectorIndexConfigHFreshUpdate,
_VectorIndexConfigUpdate,
)
from weaviate.collections.classes.config_vectorizers import (
Expand Down Expand Up @@ -1340,6 +1341,7 @@ def update(
*,
vector_index_config: Union[
_VectorIndexConfigHNSWUpdate,
_VectorIndexConfigHFreshUpdate,
_VectorIndexConfigFlatUpdate,
_VectorIndexConfigDynamicUpdate,
],
Expand Down
47 changes: 47 additions & 0 deletions weaviate/collections/classes/config_vector_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ class VectorIndexType(str, Enum):
Attributes:
HNSW: Hierarchical Navigable Small World (HNSW) index.
FLAT: Flat index.
DYNAMIC: Dynamic index.
HFRESH: HFRESH index.
"""

HNSW = "hnsw"
FLAT = "flat"
DYNAMIC = "dynamic"
HFRESH = "hfresh"


class _MultiVectorConfigCreateBase(_ConfigCreateModel):
Expand Down Expand Up @@ -127,6 +130,16 @@ def vector_index_type() -> VectorIndexType:
return VectorIndexType.HNSW


class _VectorIndexConfigHFreshCreate(_VectorIndexConfigCreate):
maxPostingSizeKB: Optional[int]
replicas: Optional[int]
searchProbe: Optional[int]

@staticmethod
def vector_index_type() -> VectorIndexType:
return VectorIndexType.HFRESH


class _VectorIndexConfigFlatCreate(_VectorIndexConfigCreate):
vectorCacheMaxObjects: Optional[int]

Expand All @@ -149,6 +162,15 @@ def vector_index_type() -> VectorIndexType:
return VectorIndexType.HNSW


class _VectorIndexConfigHFreshUpdate(_VectorIndexConfigUpdate):
maxPostingSizeKB: Optional[int]
searchProbe: Optional[int]

@staticmethod
def vector_index_type() -> VectorIndexType:
return VectorIndexType.HFRESH


class _VectorIndexConfigFlatUpdate(_VectorIndexConfigUpdate):
vectorCacheMaxObjects: Optional[int]

Expand Down Expand Up @@ -564,6 +586,31 @@ def hnsw(
multivector=multi_vector,
)

@staticmethod
def hfresh(
distance_metric: Optional[VectorDistances] = None,
max_posting_size_kb: Optional[int] = None,
replicas: Optional[int] = None,
search_probe: Optional[int] = None,
quantizer: Optional[_QuantizerConfigCreate] = None,
multi_vector: Optional[_MultiVectorConfigCreate] = None,
) -> _VectorIndexConfigHFreshCreate:
"""Create a `_VectorIndexConfigHFreshCreate` object to be used when defining the HFresh vector index configuration of Weaviate.

Use this method when defining the `vector_index_config` argument in `collections.create()`.

Args:
See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hfresh) for a more detailed view!
""" # noqa: D417 (missing argument descriptions in the docstring)
return _VectorIndexConfigHFreshCreate(
distance=distance_metric,
maxPostingSizeKB=max_posting_size_kb,
replicas=replicas,
searchProbe=search_probe,
quantizer=quantizer,
multivector=multi_vector,
)

@staticmethod
def flat(
distance_metric: Optional[VectorDistances] = None,
Expand Down
14 changes: 14 additions & 0 deletions weaviate/collections/classes/config_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
_VectorIndexConfigFlatUpdate,
_VectorIndexConfigHNSWCreate,
_VectorIndexConfigHNSWUpdate,
_VectorIndexConfigHFreshCreate,
_VectorIndexConfigHFreshUpdate,
_VectorIndexConfigUpdate,
)
from weaviate.collections.classes.config_vectorizers import (
Expand Down Expand Up @@ -128,6 +130,17 @@ def __hnsw(
multivector=multivector,
)

@staticmethod
def __hfresh(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigHFreshCreate:
return _VectorIndexConfigHFreshCreate(
maxPostingSizeKB=None,
replicas=None,
searchProbe=None,
quantizer=quantizer,
multivector=None,
distance=None,
)

@staticmethod
def __flat(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigFlatCreate:
return _VectorIndexConfigFlatCreate(
Expand Down Expand Up @@ -1804,6 +1817,7 @@ def update(
name: Optional[str] = None,
vector_index_config: Union[
_VectorIndexConfigHNSWUpdate,
_VectorIndexConfigHFreshUpdate,
_VectorIndexConfigFlatUpdate,
_VectorIndexConfigDynamicUpdate,
],
Expand Down
Loading
Loading