Skip to content

Aggregation

Aggregation happens in the rule head, together with the distinct keyword. The ? marks the aggregated output column.

@OrderBy(Stats, "category");
Stats(category:, total? += amount, count? += 1) distinct :- Sales(category:, amount:);

Non-aggregated head columns (category above) become the grouping key — like GROUP BY in SQL.

Aggregation operators

Operator Meaning
col? += expr Sum (use += 1 to count)
col? Min= expr Minimum
col? Max= expr Maximum
col? Avg= expr Average
col? List= expr Collect all values into an array
col? Set= expr Collect distinct values into an array
col? ArgMax= item -> score The item with the highest score
col? ArgMin= item -> score The item with the lowest score
# Sum
Revenue(total? += amount) distinct :- Orders(amount:);

# Count
OrderCount(n? += 1) distinct :- Orders(order_id:);

# Min / Max
Cheapest(min_price? Min= price) distinct :- Products(price:);
Priciest(max_price? Max= price) distinct :- Products(price:);

# Average
AvgOrder(avg? Avg= amount) distinct :- Orders(amount:);

# Collect into list / set
AllNames(names? List= name) distinct :- Users(name:);
UniqueNames(names? Set= name) distinct :- Users(name:);

# Value with max key
TopSeller(name? ArgMax= name -> revenue) distinct :- Sales(name:, revenue:);

Counting

Never use Count() — use count? += 1 instead.

More aggregating functions

In addition to the operators above: Array= x -> y (ordered array), ArgMinK(x -> y, k) and ArgMaxK(x -> y, k) (top-k), StringAgg= x (string concatenation), 1= x (any single value).

Deduplication without aggregation

distinct on its own deduplicates rows — this is how concepts extract unique entities:

@OrderBy(Customer, "customer_id");
Customer(customer_id:) distinct :- Orders(customer_id:);

Complete example

Sum, count, min/max/avg, Set= collection and ArgMax= over a small sales table:

# run: Stats, PriceStats, UniqueRegions, TopSeller
@Engine("duckdb");

# Tables
Sales(category: "books", region: "eu", seller: "ann", amount: 120);
Sales(category: "books", region: "us", seller: "bob", amount: 80);
Sales(category: "games", region: "eu", seller: "ann", amount: 300);
Sales(category: "games", region: "eu", seller: "cid", amount: 150);
Sales(category: "games", region: "us", seller: "bob", amount: 220);

# Rules

## Sum and count per category. Non-aggregated columns form the grouping key.
@OrderBy(Stats, "category");
Stats(category:, total? += amount, count? += 1) distinct :- Sales(category:, amount:);

## Min, max and average.
@OrderBy(PriceStats, "category");
PriceStats(category:, lo? Min= amount, hi? Max= amount, avg? Avg= amount) distinct :-
  Sales(category:, amount:);

## Collect distinct values into an array.
@OrderBy(UniqueRegions, "category");
UniqueRegions(category:, regions? Set= region) distinct :- Sales(category:, region:);

## The seller with the highest total revenue.
@OrderBy(SellerRevenue, "seller");
SellerRevenue(seller:, revenue? += amount) distinct :- Sales(seller:, amount:);

TopSeller(name? ArgMax= seller -> revenue) distinct :- SellerRevenue(seller:, revenue:);
Generated SQL and execution results
$ synalog.check('aggregation.l')
No errors found.

$ synalog.compile('aggregation.l', 'Stats')
-- Initializing DuckDB environment.
create schema if not exists logica_home;
-- Empty record, has to have a field by DuckDB syntax.
drop type if exists logicarecord893574736 cascade; create type logicarecord893574736 as struct(nirvana numeric);
create sequence if not exists eternal_logical_sequence;

WITH t_0_Sales AS (SELECT * FROM (

    SELECT
      E'books' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      120 AS amount
   UNION ALL

    SELECT
      E'books' AS category,
      E'us' AS region,
      E'bob' AS seller,
      80 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      300 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'cid' AS seller,
      150 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'us' AS region,
      E'bob' AS seller,
      220 AS amount

) AS UNUSED_TABLE_NAME  )
SELECT
  Sales.category AS category,
  SUM(Sales.amount) AS total,
  SUM(1) AS count
FROM
  t_0_Sales AS Sales
GROUP BY Sales.category ORDER BY category;

-- Executed on DuckDB:
| category | total | count |
|----------|-------|-------|
| books    | 200   | 2     |
| games    | 670   | 3     |
(2 rows)

$ synalog.compile('aggregation.l', 'PriceStats')
-- Initializing DuckDB environment.
create schema if not exists logica_home;
-- Empty record, has to have a field by DuckDB syntax.
drop type if exists logicarecord893574736 cascade; create type logicarecord893574736 as struct(nirvana numeric);
create sequence if not exists eternal_logical_sequence;

WITH t_0_Sales AS (SELECT * FROM (

    SELECT
      E'books' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      120 AS amount
   UNION ALL

    SELECT
      E'books' AS category,
      E'us' AS region,
      E'bob' AS seller,
      80 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      300 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'cid' AS seller,
      150 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'us' AS region,
      E'bob' AS seller,
      220 AS amount

) AS UNUSED_TABLE_NAME  )
SELECT
  Sales.category AS category,
  MIN(Sales.amount) AS lo,
  MAX(Sales.amount) AS hi,
  AVG(Sales.amount) AS avg
FROM
  t_0_Sales AS Sales
GROUP BY Sales.category ORDER BY category;

-- Executed on DuckDB:
| category | lo  | hi  | avg                |
|----------|-----|-----|--------------------|
| books    | 80  | 120 | 100.0              |
| games    | 150 | 300 | 223.33333333333334 |
(2 rows)

$ synalog.compile('aggregation.l', 'UniqueRegions')
-- Initializing DuckDB environment.
create schema if not exists logica_home;
-- Empty record, has to have a field by DuckDB syntax.
drop type if exists logicarecord893574736 cascade; create type logicarecord893574736 as struct(nirvana numeric);
create sequence if not exists eternal_logical_sequence;

WITH t_0_Sales AS (SELECT * FROM (

    SELECT
      E'books' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      120 AS amount
   UNION ALL

    SELECT
      E'books' AS category,
      E'us' AS region,
      E'bob' AS seller,
      80 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      300 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'cid' AS seller,
      150 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'us' AS region,
      E'bob' AS seller,
      220 AS amount

) AS UNUSED_TABLE_NAME  )
SELECT
  Sales.category AS category,
  ARRAY_AGG(DISTINCT Sales.region ORDER BY Sales.region) AS regions
FROM
  t_0_Sales AS Sales
GROUP BY Sales.category ORDER BY category;

-- Executed on DuckDB:
| category | regions      |
|----------|--------------|
| books    | ['eu', 'us'] |
| games    | ['eu', 'us'] |
(2 rows)

$ synalog.compile('aggregation.l', 'TopSeller')
-- Initializing DuckDB environment.
create schema if not exists logica_home;
-- Empty record, has to have a field by DuckDB syntax.
drop type if exists logicarecord893574736 cascade; create type logicarecord893574736 as struct(nirvana numeric);
create sequence if not exists eternal_logical_sequence;

WITH t_2_Sales AS (SELECT * FROM (

    SELECT
      E'books' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      120 AS amount
   UNION ALL

    SELECT
      E'books' AS category,
      E'us' AS region,
      E'bob' AS seller,
      80 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'ann' AS seller,
      300 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'eu' AS region,
      E'cid' AS seller,
      150 AS amount
   UNION ALL

    SELECT
      E'games' AS category,
      E'us' AS region,
      E'bob' AS seller,
      220 AS amount

) AS UNUSED_TABLE_NAME  ),
t_1_SellerRevenue AS (SELECT
  Sales.seller AS seller,
  SUM(Sales.amount) AS revenue
FROM
  t_2_Sales AS Sales
GROUP BY Sales.seller ORDER BY seller)
SELECT
  argmax(SellerRevenue.seller, SellerRevenue.revenue) AS name
FROM
  t_1_SellerRevenue AS SellerRevenue;

-- Executed on DuckDB:
| name |
|------|
| ann  |
(1 row)