New

The executive guide to generative AI

Read more

Distance feature query

edit

Boosts the relevance score of documents closer to a provided origin date or point. For example, you can use this query to give more weight to documents closer to a certain date or location.

You can use the distance_feature query to find the nearest neighbors to a location. You can also use the query in a bool search’s should filter to add boosted relevance scores to the bool query’s scores.

Example request

edit

Index setup

edit

To use the distance_feature query, your index must include a date, date_nanos or geo_point field.

To see how you can set up an index for the distance_feature query, try the following example.

  1. Create an items index with the following field mapping:

    resp = client.indices.create(
        index="items",
        mappings={
            "properties": {
                "name": {
                    "type": "keyword"
                },
                "production_date": {
                    "type": "date"
                },
                "location": {
                    "type": "geo_point"
                }
            }
        },
    )
    print(resp)
    response = client.indices.create(
      index: 'items',
      body: {
        mappings: {
          properties: {
            name: {
              type: 'keyword'
            },
            production_date: {
              type: 'date'
            },
            location: {
              type: 'geo_point'
            }
          }
        }
      }
    )
    puts response
    const response = await client.indices.create({
      index: "items",
      mappings: {
        properties: {
          name: {
            type: "keyword",
          },
          production_date: {
            type: "date",
          },
          location: {
            type: "geo_point",
          },
        },
      },
    });
    console.log(response);
    PUT /items
    {
      "mappings": {
        "properties": {
          "name": {
            "type": "keyword"
          },
          "production_date": {
            "type": "date"
          },
          "location": {
            "type": "geo_point"
          }
        }
      }
    }
  2. Index several documents to this index.

    resp = client.index(
        index="items",
        id="1",
        refresh=True,
        document={
            "name": "chocolate",
            "production_date": "2018-02-01",
            "location": [
                -71.34,
                41.12
            ]
        },
    )
    print(resp)
    
    resp1 = client.index(
        index="items",
        id="2",
        refresh=True,
        document={
            "name": "chocolate",
            "production_date": "2018-01-01",
            "location": [
                -71.3,
                41.15
            ]
        },
    )
    print(resp1)
    
    resp2 = client.index(
        index="items",
        id="3",
        refresh=True,
        document={
            "name": "chocolate",
            "production_date": "2017-12-01",
            "location": [
                -71.3,
                41.12
            ]
        },
    )
    print(resp2)
    response = client.index(
      index: 'items',
      id: 1,
      refresh: true,
      body: {
        name: 'chocolate',
        production_date: '2018-02-01',
        location: [
          -71.34,
          41.12
        ]
      }
    )
    puts response
    
    response = client.index(
      index: 'items',
      id: 2,
      refresh: true,
      body: {
        name: 'chocolate',
        production_date: '2018-01-01',
        location: [
          -71.3,
          41.15
        ]
      }
    )
    puts response
    
    response = client.index(
      index: 'items',
      id: 3,
      refresh: true,
      body: {
        name: 'chocolate',
        production_date: '2017-12-01',
        location: [
          -71.3,
          41.12
        ]
      }
    )
    puts response
    const response = await client.index({
      index: "items",
      id: 1,
      refresh: "true",
      document: {
        name: "chocolate",
        production_date: "2018-02-01",
        location: [-71.34, 41.12],
      },
    });
    console.log(response);
    
    const response1 = await client.index({
      index: "items",
      id: 2,
      refresh: "true",
      document: {
        name: "chocolate",
        production_date: "2018-01-01",
        location: [-71.3, 41.15],
      },
    });
    console.log(response1);
    
    const response2 = await client.index({
      index: "items",
      id: 3,
      refresh: "true",
      document: {
        name: "chocolate",
        production_date: "2017-12-01",
        location: [-71.3, 41.12],
      },
    });
    console.log(response2);
    PUT /items/_doc/1?refresh
    {
      "name" : "chocolate",
      "production_date": "2018-02-01",
      "location": [-71.34, 41.12]
    }
    
    PUT /items/_doc/2?refresh
    {
      "name" : "chocolate",
      "production_date": "2018-01-01",
      "location": [-71.3, 41.15]
    }
    
    
    PUT /items/_doc/3?refresh
    {
      "name" : "chocolate",
      "production_date": "2017-12-01",
      "location": [-71.3, 41.12]
    }

Example queries

edit

Boost documents based on date

edit

The following bool search returns documents with a name value of chocolate. The search also uses the distance_feature query to increase the relevance score of documents with a production_date value closer to now.

resp = client.search(
    index="items",
    query={
        "bool": {
            "must": {
                "match": {
                    "name": "chocolate"
                }
            },
            "should": {
                "distance_feature": {
                    "field": "production_date",
                    "pivot": "7d",
                    "origin": "now"
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'items',
  body: {
    query: {
      bool: {
        must: {
          match: {
            name: 'chocolate'
          }
        },
        should: {
          distance_feature: {
            field: 'production_date',
            pivot: '7d',
            origin: 'now'
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "items",
  query: {
    bool: {
      must: {
        match: {
          name: "chocolate",
        },
      },
      should: {
        distance_feature: {
          field: "production_date",
          pivot: "7d",
          origin: "now",
        },
      },
    },
  },
});
console.log(response);
GET /items/_search
{
  "query": {
    "bool": {
      "must": {
        "match": {
          "name": "chocolate"
        }
      },
      "should": {
        "distance_feature": {
          "field": "production_date",
          "pivot": "7d",
          "origin": "now"
        }
      }
    }
  }
}

Boost documents based on location

edit

The following bool search returns documents with a name value of chocolate. The search also uses the distance_feature query to increase the relevance score of documents with a location value closer to [-71.3, 41.15].

resp = client.search(
    index="items",
    query={
        "bool": {
            "must": {
                "match": {
                    "name": "chocolate"
                }
            },
            "should": {
                "distance_feature": {
                    "field": "location",
                    "pivot": "1000m",
                    "origin": [
                        -71.3,
                        41.15
                    ]
                }
            }
        }
    },
)
print(resp)
response = client.search(
  index: 'items',
  body: {
    query: {
      bool: {
        must: {
          match: {
            name: 'chocolate'
          }
        },
        should: {
          distance_feature: {
            field: 'location',
            pivot: '1000m',
            origin: [
              -71.3,
              41.15
            ]
          }
        }
      }
    }
  }
)
puts response
const response = await client.search({
  index: "items",
  query: {
    bool: {
      must: {
        match: {
          name: "chocolate",
        },
      },
      should: {
        distance_feature: {
          field: "location",
          pivot: "1000m",
          origin: [-71.3, 41.15],
        },
      },
    },
  },
});
console.log(response);
GET /items/_search
{
  "query": {
    "bool": {
      "must": {
        "match": {
          "name": "chocolate"
        }
      },
      "should": {
        "distance_feature": {
          "field": "location",
          "pivot": "1000m",
          "origin": [-71.3, 41.15]
        }
      }
    }
  }
}

Top-level parameters for distance_feature

edit
field

(Required, string) Name of the field used to calculate distances. This field must meet the following criteria:

origin

(Required, string) Date or point of origin used to calculate distances.

If the field value is a date or date_nanos field, the origin value must be a date. Date Math, such as now-1h, is supported.

If the field value is a geo_point field, the origin value must be a geopoint.

pivot

(Required, time unit or distance unit) Distance from the origin at which relevance scores receive half of the boost value.

If the field value is a date or date_nanos field, the pivot value must be a time unit, such as 1h or 10d.

If the field value is a geo_point field, the pivot value must be a distance unit, such as 1km or 12m.

boost

(Optional, float) Floating point number used to multiply the relevance score of matching documents. This value cannot be negative. Defaults to 1.0.

Notes

edit

How the distance_feature query calculates relevance scores

edit

The distance_feature query dynamically calculates the distance between the origin value and a document’s field values. It then uses this distance as a feature to boost the relevance score of closer documents.

The distance_feature query calculates a document’s relevance score as follows:

relevance score = boost * pivot / (pivot + distance)

The distance is the absolute difference between the origin value and a document’s field value.

Skip non-competitive hits

edit

Unlike the function_score query or other ways to change relevance scores, the distance_feature query efficiently skips non-competitive hits when the track_total_hits parameter is not true.