Elasticsearch Query Language (ES|QL)
Elasticsearch and Kibana added a brand new query language: ES|QL — coming with a new endpoint (_query
) and a simplified syntax. It lets you refine your results one step at a time and adds new features like data enrichment and processing right in your query. And you can use it across the Elastic Stack — from the Elasticsearch API to Discover and Alerting in Kibana. But the biggest change is behind the scenes: Using a new compute engine that was built with performance in mind.
Join us for an overview and a look at syntax and internals.
Kibana Console sample queries you can run on https://esql.demo.elastic.co.
/**
* See the documentation for a complete list of
* features; we'll focus on the concepts and some
* interesting examples.
* Some queries will (intentionally) result in errors.
*/
// Let's get started with the equivalent of GET /
// Cover output formats, case sensitivity, KEEP, LIMIT
POST _query
{
"query": """
SHOW INFO //Comment
"""
}
POST _query?format=json //But no comment here
{
"query": """
SHOW INFO
"""
}
POST _query?format=csv
{
"query": """
SHOW INFO
"""
}
POST _query?format=txt
{
"query": """
SHOW INFO
"""
}
POST _query?format=txt
{
"query": """
show info //Are keywords case sensitive?
"""
}
POST _query?format=txt
{
"query": """
SHOW INFO
| LIMIT 1 //Removing the warning
"""
}
POST _query?format=txt
{
"query": """
SHOW INFO
| LIMIT 1
| KEEP version //Reducing the output
"""
}
POST _query?format=txt
{
"query": """
/* Combining it all with a
* multi-line comment */
SHOW INFO
| LIMIT 1
| KEEP version
"""
}
// Where should you put the pipe?
POST _query?format=txt
{
"query": """
SHOW INFO | LIMIT 1 | KEEP version
"""
}
POST _query?format=txt
{
"query": """
SHOW INFO |
LIMIT 1 |
KEEP version
"""
}
POST _query?format=txt
{
"query": """
SHOW INFO
| LIMIT 1
| KEEP version
"""
}
// Use some actual data
POST _query?format=txt
{
"query": """
FROM sample_data
| LIMIT 5
"""
}
GET _query?format=txt
{
"query": """ //Can you use GET as well?
FROM sample_data
| LIMIT 5
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data METADATA _index, _id, _version
| LIMIT 5
"""
}
POST _query
{
"query": """
FROM sample_data
| LIMIT 5
""",
"profile": true //This is undocumented for now but might come in handy; only works with JSON
}
POST _query?format=txt
{
"query": """
FROM sample_data
| KEEP @timestamp, message
| LIMIT 10
"""
}
// A quick look at data types
POST _query?format=txt
{
"query": """
FROM sample_data
| WHERE client_ip = "172.21.2.162"
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| WHERE client_ip == "172.21.2.162"
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| WHERE client_ip == TO_IP("172.21.2.162")
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| WHERE client_ip IS NOT NULL
| LIMIT 10
"""
}
// And finally aggregations (with a query DSL example for comparison)
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS COUNT_DISTINCT(client_ip), AVG(event_duration)
| LIMIT 1
"""
}
GET sample_data/_search
{
"size": 0,
"aggs": {
"distinct_client-ip": {
"cardinality": {
"field": "client_ip"
}
},
"average_event-duration": {
"avg": {
"field": "event_duration"
}
}
}
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), COUNT(*) BY client_ip
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), COUNT(*) BY client_ip
| SORT COUNT(*)
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), mycount = COUNT(*) BY client_ip
| SORT mycount
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), COUNT(*) BY client_ip
| SORT `count(*)` //Case sensitivity revisited — will this work?
| LIMIT 10
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), COUNT(*) BY client_ip
| SORT `COUNT(*)` DESC
| LIMIT 10
"""
}
// Precedural queries as a core concept
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS AVG(event_duration), COUNT(*) BY client_ip
| LIMIT 5
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| LIMIT 5
| STATS AVG(event_duration), COUNT(*) BY client_ip
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| LIMIT 5
| STATS AVG(event_duration), COUNT(*) BY client_ip
| SORT `COUNT(*)` DESC
| LIMIT 1
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| STATS average_duration = AVG(event_duration), count = COUNT(*) BY client_ip
| STATS MAX(average_duration) BY count //A chained aggregation
| LIMIT 5
"""
}
// More language features, using ROW for sample data
POST _query?format=txt
{
"query": """
ROW a = "philipp@elastic.co"
| WHERE a RLIKE ".*@elastic.co"
| KEEP a
"""
}
POST _query?format=txt
{
"query": """
ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1"
| DISSECT a "%{date} - %{msg} - %{ip}"
| KEEP date, msg, ip
| LIMIT 1
"""
}
POST _query?format=txt
{
"query": """
ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}"
// Note the impact of switching KEEP and EVAL around
| KEEP date, ip, email, num
| EVAL date = DATE_EXTRACT("year", TO_DATETIME(date))
| LIMIT 1
"""
}
POST _query?format=txt
{
"query": """
ROW first_name = "Philipp", last_name = "Krenn", height = 1.81
| EVAL height_feet = height * 3.281, height_cm = height * 100
"""
}
POST _query?format=txt
{
"query": """
ROW height = 1.81, weight = 75
| EVAL BMI = ROUND(weight / POW(height, 2), 1)
| EVAL healthy = CASE(BMI < 18.5, false, BMI >= 25, false, true)
"""
}
// Multivalued (arrays) fields can be read but functions on them
// return null (unless otherwise noted in the docs)
POST _query?format=txt
{
"query": """
ROW a = 5, b = [ 1, 2 ]
| EVAL a - 3, b + 2, a + b
| LIMIT 1
"""
}
// Enrich your data with lookups
GET _enrich/policy/clientip_policy
POST _query?format=txt
{
"query": """
FROM clientips
| LIMIT 5
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| KEEP @timestamp, client_ip, event_duration
| EVAL client_ip = TO_STRING(client_ip)
| ENRICH clientip_policy ON client_ip WITH env
"""
}
POST _query?format=txt
{
"query": """
FROM sample_data
| KEEP @timestamp, client_ip, event_duration
| EVAL client_ip = TO_STRING(client_ip)
| ENRICH clientip_policy ON client_ip WITH env
| STATS median_duration = MEDIAN(event_duration) BY env
"""
}