Skip to content

Commit a38d81e

Browse files
authored
SOLR-13309: Introduce FloatRangeField to expose Lucene 'FloatRange' (#4229)
Mirrors recently added 'IntRangeField' and 'LongRangeField' types.
1 parent a364ac7 commit a38d81e

9 files changed

Lines changed: 1213 additions & 9 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
title: Introduce new `FloatRangeField` field type for storing and querying float-based ranges
2+
type: added
3+
authors:
4+
- name: Jason Gerlowski
5+
links:
6+
- name: SOLR-13309
7+
url: https://issues.apache.org/jira/browse/SOLR-13309

solr/core/src/java/org/apache/solr/schema/numericrange/AbstractNumericRangeField.java

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
*
5454
* @see IntRangeField
5555
* @see LongRangeField
56+
* @see FloatRangeField
5657
*/
5758
public abstract class AbstractNumericRangeField extends PrimitiveFieldType {
5859

@@ -82,9 +83,55 @@ public interface NumericRangeValue {
8283
protected static final Pattern SINGLE_BOUND_PATTERN =
8384
Pattern.compile("^" + COMMA_DELIMITED_NUMS + "$");
8485

86+
/**
87+
* Regex fragment matching a comma-separated list of signed floating-point numbers (integers,
88+
* floating-point literals, or values in scientific notation such as {@code 1.2e3} or {@code
89+
* -4.5E-6}).
90+
*/
91+
protected static final String COMMA_DELIMITED_FP_NUMS =
92+
"-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?(?:\\s*,\\s*-?\\d+(?:\\.\\d+)?(?:[eE][+-]?\\d+)?)*";
93+
94+
private static final String FP_RANGE_PATTERN_STR =
95+
"\\[\\s*(" + COMMA_DELIMITED_FP_NUMS + ")\\s+TO\\s+(" + COMMA_DELIMITED_FP_NUMS + ")\\s*\\]";
96+
97+
/**
98+
* Pre-compiled pattern matching {@code [min1,min2,... TO max1,max2,...]} range syntax where
99+
* values may be floating-point numbers.
100+
*/
101+
protected static final Pattern FP_RANGE_PATTERN_REGEX = Pattern.compile(FP_RANGE_PATTERN_STR);
102+
103+
/**
104+
* Pre-compiled pattern matching a single (multi-dimensional) floating-point bound, e.g. {@code
105+
* 1.5,2.0,3.14}.
106+
*/
107+
protected static final Pattern FP_SINGLE_BOUND_PATTERN =
108+
Pattern.compile("^" + COMMA_DELIMITED_FP_NUMS + "$");
109+
85110
/** Configured number of dimensions for this field type; defaults to 1. */
86111
protected int numDimensions = 1;
87112

113+
/**
114+
* Returns the regex {@link Pattern} used to match a full range value string of the form {@code
115+
* [min TO max]}. Subclasses may override to use an alternative pattern (e.g. one that accepts
116+
* floating-point numbers).
117+
*
118+
* @return the range pattern for this field type
119+
*/
120+
protected Pattern getRangePattern() {
121+
return RANGE_PATTERN_REGEX;
122+
}
123+
124+
/**
125+
* Returns the regex {@link Pattern} used to match a single multi-dimensional bound (e.g. {@code
126+
* 1,2,3}). Subclasses may override to use an alternative pattern (e.g. one that accepts
127+
* floating-point numbers).
128+
*
129+
* @return the single-bound pattern for this field type
130+
*/
131+
protected Pattern getSingleBoundPattern() {
132+
return SINGLE_BOUND_PATTERN;
133+
}
134+
88135
@Override
89136
protected boolean enableDocValuesByDefault() {
90137
return false; // Range fields do not support docValues
@@ -287,13 +334,13 @@ public Query getFieldQuery(QParser parser, SchemaField field, String externalVal
287334
String trimmed = externalVal.trim();
288335

289336
// Check if it's the full range syntax: [min1,min2 TO max1,max2]
290-
if (RANGE_PATTERN_REGEX.matcher(trimmed).matches()) {
337+
if (getRangePattern().matcher(trimmed).matches()) {
291338
final var rangeValue = parseRangeValue(trimmed);
292339
return newContainsQuery(field.getName(), rangeValue);
293340
}
294341

295342
// Syntax sugar: also accept a single-bound (i.e pX,pY,pZ)
296-
if (SINGLE_BOUND_PATTERN.matcher(trimmed).matches()) {
343+
if (getSingleBoundPattern().matcher(trimmed).matches()) {
297344
final var singleBoundRange = parseSingleBound(trimmed);
298345

299346
if (singleBoundRange.getDimensions() != numDimensions) {
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.schema.numericrange;
18+
19+
import java.util.regex.Matcher;
20+
import java.util.regex.Pattern;
21+
import org.apache.lucene.document.FloatRange;
22+
import org.apache.lucene.index.IndexableField;
23+
import org.apache.lucene.search.Query;
24+
import org.apache.solr.common.SolrException;
25+
import org.apache.solr.common.SolrException.ErrorCode;
26+
import org.apache.solr.schema.SchemaField;
27+
import org.apache.solr.search.QParser;
28+
29+
/**
30+
* Field type for float ranges with support for 1-4 dimensions.
31+
*
32+
* <p>This field type wraps Lucene's {@link FloatRange} to provide storage and querying of float
33+
* range values. Ranges can be 1-dimensional (simple ranges), 2-dimensional (bounding boxes),
34+
* 3-dimensional (bounding cubes), or 4-dimensional (tesseracts).
35+
*
36+
* <h2>Value Format</h2>
37+
*
38+
* Values are specified using bracket notation with a TO keyword separator:
39+
*
40+
* <ul>
41+
* <li>1D: {@code [1.5 TO 2.5]}
42+
* <li>2D: {@code [1.0,2.0 TO 3.0,4.0]}
43+
* <li>3D: {@code [1.0,2.0,3.0 TO 4.0,5.0,6.0]}
44+
* <li>4D: {@code [1.0,2.0,3.0,4.0 TO 5.0,6.0,7.0,8.0]}
45+
* </ul>
46+
*
47+
* As the name suggests minimum values (those on the left) must always be less than or equal to the
48+
* maximum value for the corresponding dimension. Integer values (e.g. {@code [10 TO 20]}) are also
49+
* accepted and parsed as floats.
50+
*
51+
* <h2>Schema Configuration</h2>
52+
*
53+
* <pre>
54+
* &lt;fieldType name="floatrange" class="org.apache.solr.schema.numericrange.FloatRangeField" numDimensions="1"/&gt;
55+
* &lt;fieldType name="floatrange2d" class="org.apache.solr.schema.numericrange.FloatRangeField" numDimensions="2"/&gt;
56+
* &lt;field name="price_range" type="floatrange" indexed="true" stored="true"/&gt;
57+
* &lt;field name="bbox" type="floatrange2d" indexed="true" stored="true"/&gt;
58+
* </pre>
59+
*
60+
* <h2>Querying</h2>
61+
*
62+
* Use the {@code numericRange} query parser for range queries with support for different query
63+
* types:
64+
*
65+
* <ul>
66+
* <li>Intersects: {@code {!numericRange criteria="intersects" field=price_range}[1.0 TO 2.0]}
67+
* <li>Within: {@code {!numericRange criteria="within" field=price_range}[0.0 TO 3.0]}
68+
* <li>Contains: {@code {!numericRange criteria="contains" field=price_range}[1.5 TO 1.75]}
69+
* <li>Crosses: {@code {!numericRange criteria="crosses" field=price_range}[1.5 TO 2.5]}
70+
* </ul>
71+
*
72+
* <h2>Limitations</h2>
73+
*
74+
* The main limitation of this field type is that it doesn't support docValues or uninversion, and
75+
* therefore can't be used for sorting, faceting, etc.
76+
*
77+
* @see FloatRange
78+
* @see org.apache.solr.search.numericrange.NumericRangeQParserPlugin
79+
*/
80+
public class FloatRangeField extends AbstractNumericRangeField {
81+
82+
@Override
83+
protected Pattern getRangePattern() {
84+
return FP_RANGE_PATTERN_REGEX;
85+
}
86+
87+
@Override
88+
protected Pattern getSingleBoundPattern() {
89+
return FP_SINGLE_BOUND_PATTERN;
90+
}
91+
92+
@Override
93+
public IndexableField createField(SchemaField field, Object value) {
94+
if (!field.indexed() && !field.stored()) {
95+
return null;
96+
}
97+
98+
String valueStr = value.toString();
99+
RangeValue rangeValue = parseRangeValue(valueStr);
100+
101+
return new FloatRange(field.getName(), rangeValue.mins, rangeValue.maxs);
102+
}
103+
104+
/**
105+
* Parse a range value string into a RangeValue object.
106+
*
107+
* @param value the string value in format "[min1,min2,... TO max1,max2,...]"
108+
* @return parsed RangeValue
109+
* @throws SolrException if value format is invalid
110+
*/
111+
@Override
112+
public RangeValue parseRangeValue(String value) {
113+
if (value == null || value.trim().isEmpty()) {
114+
throw new SolrException(ErrorCode.BAD_REQUEST, "Range value cannot be null or empty");
115+
}
116+
117+
Matcher matcher = FP_RANGE_PATTERN_REGEX.matcher(value.trim());
118+
if (!matcher.matches()) {
119+
throw new SolrException(
120+
ErrorCode.BAD_REQUEST,
121+
"Invalid range format. Expected: [min1,min2,... TO max1,max2,...] where min and max values are floats, but got: "
122+
+ value);
123+
}
124+
125+
String minPart = matcher.group(1).trim();
126+
String maxPart = matcher.group(2).trim();
127+
128+
float[] mins = parseFloatArray(minPart, "min values");
129+
float[] maxs = parseFloatArray(maxPart, "max values");
130+
131+
if (mins.length != maxs.length) {
132+
throw new SolrException(
133+
ErrorCode.BAD_REQUEST,
134+
"Min and max dimensions must match. Min dimensions: "
135+
+ mins.length
136+
+ ", max dimensions: "
137+
+ maxs.length);
138+
}
139+
140+
if (mins.length != numDimensions) {
141+
throw new SolrException(
142+
ErrorCode.BAD_REQUEST,
143+
"Range dimensions ("
144+
+ mins.length
145+
+ ") do not match field type numDimensions ("
146+
+ numDimensions
147+
+ ")");
148+
}
149+
150+
// Validate that min <= max for each dimension
151+
for (int i = 0; i < mins.length; i++) {
152+
if (mins[i] > maxs[i]) {
153+
throw new SolrException(
154+
ErrorCode.BAD_REQUEST,
155+
"Min value must be <= max value for dimension "
156+
+ i
157+
+ ". Min: "
158+
+ mins[i]
159+
+ ", Max: "
160+
+ maxs[i]);
161+
}
162+
}
163+
164+
return new RangeValue(mins, maxs);
165+
}
166+
167+
@Override
168+
public NumericRangeValue parseSingleBound(String value) {
169+
final var singleBoundTyped = parseFloatArray(value, "single bound values");
170+
return new RangeValue(singleBoundTyped, singleBoundTyped);
171+
}
172+
173+
/**
174+
* Parse a comma-separated string of floats into an array.
175+
*
176+
* @param str the string to parse
177+
* @param description description for error messages
178+
* @return array of parsed floats
179+
*/
180+
private float[] parseFloatArray(String str, String description) {
181+
String[] parts = str.split(",");
182+
float[] result = new float[parts.length];
183+
184+
for (int i = 0; i < parts.length; i++) {
185+
try {
186+
result[i] = Float.parseFloat(parts[i].trim());
187+
} catch (NumberFormatException e) {
188+
throw new SolrException(
189+
ErrorCode.BAD_REQUEST,
190+
"Invalid float in " + description + ": '" + parts[i].trim() + "'",
191+
e);
192+
}
193+
}
194+
195+
return result;
196+
}
197+
198+
@Override
199+
public Query newContainsQuery(String fieldName, NumericRangeValue rangeValue) {
200+
final var rv = (RangeValue) rangeValue;
201+
return FloatRange.newContainsQuery(fieldName, rv.mins, rv.maxs);
202+
}
203+
204+
@Override
205+
public Query newIntersectsQuery(String fieldName, NumericRangeValue rangeValue) {
206+
final var rv = (RangeValue) rangeValue;
207+
return FloatRange.newIntersectsQuery(fieldName, rv.mins, rv.maxs);
208+
}
209+
210+
@Override
211+
public Query newWithinQuery(String fieldName, NumericRangeValue rangeValue) {
212+
final var rv = (RangeValue) rangeValue;
213+
return FloatRange.newWithinQuery(fieldName, rv.mins, rv.maxs);
214+
}
215+
216+
@Override
217+
public Query newCrossesQuery(String fieldName, NumericRangeValue rangeValue) {
218+
final var rv = (RangeValue) rangeValue;
219+
return FloatRange.newCrossesQuery(fieldName, rv.mins, rv.maxs);
220+
}
221+
222+
@Override
223+
protected Query getSpecializedRangeQuery(
224+
QParser parser,
225+
SchemaField field,
226+
String part1,
227+
String part2,
228+
boolean minInclusive,
229+
boolean maxInclusive) {
230+
// For standard range syntax field:[value TO value], default to contains query
231+
if (part1 == null || part2 == null) {
232+
return super.getSpecializedRangeQuery(
233+
parser, field, part1, part2, minInclusive, maxInclusive);
234+
}
235+
236+
// Parse the range bounds as single-dimensional float values
237+
float min, max;
238+
try {
239+
min = Float.parseFloat(part1.trim());
240+
max = Float.parseFloat(part2.trim());
241+
} catch (NumberFormatException e) {
242+
throw new SolrException(
243+
ErrorCode.BAD_REQUEST,
244+
"Invalid float values in range query: [" + part1 + " TO " + part2 + "]",
245+
e);
246+
}
247+
248+
// For exclusive bounds, step to the next representable float value
249+
if (!minInclusive) {
250+
min = Math.nextUp(min);
251+
}
252+
if (!maxInclusive) {
253+
max = Math.nextDown(max);
254+
}
255+
256+
// Build arrays for the query based on configured dimensions
257+
float[] mins = new float[numDimensions];
258+
float[] maxs = new float[numDimensions];
259+
260+
// For now, only support 1D range syntax with field:[X TO Y]
261+
if (numDimensions == 1) {
262+
mins[0] = min;
263+
maxs[0] = max;
264+
return FloatRange.newContainsQuery(field.getName(), mins, maxs);
265+
} else {
266+
throw new SolrException(
267+
ErrorCode.BAD_REQUEST,
268+
"Standard range query syntax only supports 1D ranges. "
269+
+ "Use {!numericRange ...} for multi-dimensional queries.");
270+
}
271+
}
272+
273+
/** Simple holder class for parsed float range values. */
274+
public static class RangeValue implements AbstractNumericRangeField.NumericRangeValue {
275+
public final float[] mins;
276+
public final float[] maxs;
277+
278+
public RangeValue(float[] mins, float[] maxs) {
279+
this.mins = mins;
280+
this.maxs = maxs;
281+
}
282+
283+
@Override
284+
public int getDimensions() {
285+
return mins.length;
286+
}
287+
288+
@Override
289+
public String toString() {
290+
StringBuilder sb = new StringBuilder("[");
291+
for (int i = 0; i < mins.length; i++) {
292+
if (i > 0) sb.append(",");
293+
sb.append(mins[i]);
294+
}
295+
sb.append(" TO ");
296+
for (int i = 0; i < maxs.length; i++) {
297+
if (i > 0) sb.append(",");
298+
sb.append(maxs[i]);
299+
}
300+
sb.append("]");
301+
return sb.toString();
302+
}
303+
}
304+
}

0 commit comments

Comments
 (0)