IMPORTANT: No additional bug fixes or documentation updates
will be released for this version. For the latest information, see the
current release documentation.
Test an analyzer
edit
IMPORTANT: This documentation is no longer updated. Refer to Elastic's version policy and the latest documentation.
Test an analyzer
editThe analyze API is an invaluable tool for viewing the
terms produced by an analyzer. A built-in analyzer can be specified inline in
the request:
resp = client.indices.analyze(
analyzer="whitespace",
text="The quick brown fox.",
)
print(resp)
response = client.indices.analyze(
body: {
analyzer: 'whitespace',
text: 'The quick brown fox.'
}
)
puts response
const response = await client.indices.analyze({
analyzer: "whitespace",
text: "The quick brown fox.",
});
console.log(response);
POST _analyze
{
"analyzer": "whitespace",
"text": "The quick brown fox."
}
The API returns the following response:
{
"tokens": [
{
"token": "The",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 0
},
{
"token": "quick",
"start_offset": 4,
"end_offset": 9,
"type": "word",
"position": 1
},
{
"token": "brown",
"start_offset": 10,
"end_offset": 15,
"type": "word",
"position": 2
},
{
"token": "fox.",
"start_offset": 16,
"end_offset": 20,
"type": "word",
"position": 3
}
]
}
You can also test combinations of:
- A tokenizer
- Zero or more token filters
- Zero or more character filters
resp = client.indices.analyze(
tokenizer="standard",
filter=[
"lowercase",
"asciifolding"
],
text="Is this déja vu?",
)
print(resp)
response = client.indices.analyze(
body: {
tokenizer: 'standard',
filter: [
'lowercase',
'asciifolding'
],
text: 'Is this déja vu?'
}
)
puts response
const response = await client.indices.analyze({
tokenizer: "standard",
filter: ["lowercase", "asciifolding"],
text: "Is this déja vu?",
});
console.log(response);
POST _analyze
{
"tokenizer": "standard",
"filter": [ "lowercase", "asciifolding" ],
"text": "Is this déja vu?"
}
The API returns the following response:
{
"tokens": [
{
"token": "is",
"start_offset": 0,
"end_offset": 2,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "this",
"start_offset": 3,
"end_offset": 7,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "deja",
"start_offset": 8,
"end_offset": 12,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "vu",
"start_offset": 13,
"end_offset": 15,
"type": "<ALPHANUM>",
"position": 3
}
]
}
Alternatively, a custom analyzer can be
referred to when running the analyze API on a specific index:
resp = client.indices.create(
index="my-index-000001",
settings={
"analysis": {
"analyzer": {
"std_folded": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
mappings={
"properties": {
"my_text": {
"type": "text",
"analyzer": "std_folded"
}
}
},
)
print(resp)
resp1 = client.indices.analyze(
index="my-index-000001",
analyzer="std_folded",
text="Is this déjà vu?",
)
print(resp1)
resp2 = client.indices.analyze(
index="my-index-000001",
field="my_text",
text="Is this déjà vu?",
)
print(resp2)
response = client.indices.create(
index: 'my-index-000001',
body: {
settings: {
analysis: {
analyzer: {
std_folded: {
type: 'custom',
tokenizer: 'standard',
filter: [
'lowercase',
'asciifolding'
]
}
}
}
},
mappings: {
properties: {
my_text: {
type: 'text',
analyzer: 'std_folded'
}
}
}
}
)
puts response
response = client.indices.analyze(
index: 'my-index-000001',
body: {
analyzer: 'std_folded',
text: 'Is this déjà vu?'
}
)
puts response
response = client.indices.analyze(
index: 'my-index-000001',
body: {
field: 'my_text',
text: 'Is this déjà vu?'
}
)
puts response
const response = await client.indices.create({
index: "my-index-000001",
settings: {
analysis: {
analyzer: {
std_folded: {
type: "custom",
tokenizer: "standard",
filter: ["lowercase", "asciifolding"],
},
},
},
},
mappings: {
properties: {
my_text: {
type: "text",
analyzer: "std_folded",
},
},
},
});
console.log(response);
const response1 = await client.indices.analyze({
index: "my-index-000001",
analyzer: "std_folded",
text: "Is this déjà vu?",
});
console.log(response1);
const response2 = await client.indices.analyze({
index: "my-index-000001",
field: "my_text",
text: "Is this déjà vu?",
});
console.log(response2);
PUT my-index-000001
{
"settings": {
"analysis": {
"analyzer": {
"std_folded": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"properties": {
"my_text": {
"type": "text",
"analyzer": "std_folded"
}
}
}
}
GET my-index-000001/_analyze
{
"analyzer": "std_folded",
"text": "Is this déjà vu?"
}
GET my-index-000001/_analyze
{
"field": "my_text",
"text": "Is this déjà vu?"
}
The API returns the following response:
{
"tokens": [
{
"token": "is",
"start_offset": 0,
"end_offset": 2,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "this",
"start_offset": 3,
"end_offset": 7,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "deja",
"start_offset": 8,
"end_offset": 12,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "vu",
"start_offset": 13,
"end_offset": 15,
"type": "<ALPHANUM>",
"position": 3
}
]
}