UQA Documentation

Quick reference for SQL syntax, Python API, and cross-paradigm features.

Quick Start

Installation

# From PyPI
pip install uqa

# From source
pip install -e .

# With dev dependencies
pip install -e ".[dev]"

Interactive Shell

usql                 # In-memory database
usql --db mydata.db  # Persistent SQLite-backed database

Python

from uqa.engine import Engine

engine = Engine(db_path="research.db")

engine.sql("""
    CREATE TABLE papers (
        id SERIAL PRIMARY KEY,
        title TEXT NOT NULL,
        year INTEGER NOT NULL
    )
""")

engine.sql("""INSERT INTO papers (title, year) VALUES
    ('attention is all you need', 2017),
    ('bert pre-training', 2019)
""")

result = engine.sql("SELECT * FROM papers ORDER BY year")
print(result)

engine.close()

Data Definition Language

CREATE TABLE

CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT UNIQUE,
    dept_id INTEGER REFERENCES departments(id),
    salary NUMERIC(10,2) DEFAULT 0,
    active BOOLEAN DEFAULT TRUE,
    hired_at TIMESTAMP DEFAULT NOW(),
    CHECK (salary >= 0)
);

CREATE TABLE IF NOT EXISTS logs (id SERIAL, msg TEXT);
CREATE TEMPORARY TABLE scratch (x INTEGER);
CREATE TABLE summary AS SELECT dept_id, COUNT(*) FROM employees GROUP BY dept_id;

ALTER TABLE

ALTER TABLE employees ADD COLUMN bio TEXT;
ALTER TABLE employees DROP COLUMN bio;
ALTER TABLE employees RENAME COLUMN name TO full_name;
ALTER TABLE employees RENAME TO staff;
ALTER TABLE employees ALTER COLUMN salary SET DEFAULT 50000;
ALTER TABLE employees ALTER COLUMN salary SET NOT NULL;

Indexes

-- B-tree index
CREATE INDEX idx_dept ON employees (dept_id);

-- HNSW vector index (explicit creation required)
CREATE INDEX idx_emb ON papers USING hnsw (embedding)
    WITH (ef_construction = 200, m = 16);

DROP INDEX idx_dept;
DROP INDEX IF EXISTS idx_emb;

Sequences

CREATE SEQUENCE order_seq;
SELECT NEXTVAL('order_seq');
SELECT CURRVAL('order_seq');
SELECT SETVAL('order_seq', 100);
ALTER SEQUENCE order_seq RESTART WITH 1;

Views

CREATE VIEW active_staff AS
    SELECT * FROM employees WHERE active = TRUE;
DROP VIEW IF EXISTS active_staff;

Data Manipulation Language

INSERT

INSERT INTO employees (name, salary) VALUES ('Alice', 90000);

-- Multiple rows
INSERT INTO employees (name, salary) VALUES
    ('Bob', 85000), ('Carol', 92000);

-- Insert from query
INSERT INTO archive SELECT * FROM employees WHERE active = FALSE;

-- UPSERT
INSERT INTO employees (id, name, salary) VALUES (1, 'Alice', 95000)
    ON CONFLICT (id) DO UPDATE SET salary = EXCLUDED.salary;

-- RETURNING
INSERT INTO employees (name) VALUES ('Dave') RETURNING id, name;

UPDATE

UPDATE employees SET salary = salary * 1.1 WHERE dept_id = 3;

-- UPDATE with FROM (join)
UPDATE employees e SET salary = salary + d.bonus
    FROM departments d WHERE e.dept_id = d.id;

UPDATE employees SET active = FALSE WHERE id = 5 RETURNING *;

DELETE

DELETE FROM employees WHERE active = FALSE;

-- DELETE with USING (join)
DELETE FROM employees e USING departments d
    WHERE e.dept_id = d.id AND d.name = 'Closed';

DELETE FROM employees WHERE id = 10 RETURNING id, name;

Queries

SELECT DISTINCT dept_id, COUNT(*) AS cnt,
       AVG(salary) AS avg_sal
FROM employees
WHERE active = TRUE AND salary BETWEEN 50000 AND 150000
GROUP BY dept_id
HAVING COUNT(*) > 3
ORDER BY avg_sal DESC NULLS LAST
LIMIT 10 OFFSET 0;

JOINs

All standard JOIN types are supported. The DPccp optimizer automatically selects the optimal join order for 2+ tables.

-- INNER JOIN
SELECT e.name, d.name AS dept
FROM employees e INNER JOIN departments d ON e.dept_id = d.id;

-- LEFT / RIGHT / FULL OUTER JOIN
SELECT e.name, d.name
FROM employees e LEFT JOIN departments d ON e.dept_id = d.id;

-- CROSS JOIN
SELECT * FROM colors CROSS JOIN sizes;

-- Non-equality condition
SELECT * FROM events a JOIN events b
    ON a.end_time >= b.start_time AND a.end_time <= b.end_time;

-- LATERAL subquery
SELECT d.name, top.name, top.salary
FROM departments d,
     LATERAL (SELECT * FROM employees e
              WHERE e.dept_id = d.id
              ORDER BY salary DESC LIMIT 3) top;

Subqueries

-- IN subquery
SELECT * FROM employees
WHERE dept_id IN (SELECT id FROM departments WHERE budget > 100000);

-- EXISTS
SELECT * FROM departments d
WHERE EXISTS (SELECT 1 FROM employees e WHERE e.dept_id = d.id);

-- Scalar subquery
SELECT name, salary,
       salary - (SELECT AVG(salary) FROM employees) AS diff
FROM employees;

-- Derived table
SELECT * FROM (SELECT dept_id, AVG(salary) AS avg_sal
               FROM employees GROUP BY dept_id) sub
WHERE sub.avg_sal > 80000;

Common Table Expressions

-- Simple CTE
WITH dept_stats AS (
    SELECT dept_id, AVG(salary) AS avg_sal
    FROM employees GROUP BY dept_id
)
SELECT e.name, d.avg_sal
FROM employees e JOIN dept_stats d ON e.dept_id = d.dept_id;

-- Recursive CTE
WITH RECURSIVE tree AS (
    SELECT id, name, 1 AS depth
    FROM org WHERE parent_id IS NULL
    UNION ALL
    SELECT o.id, o.name, t.depth + 1
    FROM org o JOIN tree t ON o.parent_id = t.id
)
SELECT * FROM tree ORDER BY depth;

Window Functions

-- Ranking
SELECT name, salary,
    ROW_NUMBER() OVER (ORDER BY salary DESC) AS rn,
    RANK()       OVER (ORDER BY salary DESC) AS rnk,
    DENSE_RANK() OVER (ORDER BY salary DESC) AS drnk
FROM employees;

-- Running total with frame
SELECT sale_date, amount,
    SUM(amount) OVER (ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running
FROM sales;

-- LAG / LEAD
SELECT name, salary,
    LAG(salary, 1) OVER (ORDER BY salary) AS prev,
    LEAD(salary, 1) OVER (ORDER BY salary) AS next
FROM employees;

-- Named window
SELECT name,
    SUM(salary) OVER w,
    AVG(salary) OVER w
FROM employees
WINDOW w AS (PARTITION BY dept_id ORDER BY hired_at);

Aggregates

-- Basic
SELECT dept_id,
    COUNT(*), SUM(salary), AVG(salary),
    MIN(salary), MAX(salary),
    STDDEV(salary), VARIANCE(salary)
FROM employees GROUP BY dept_id;

-- FILTER clause
SELECT
    COUNT(*) FILTER (WHERE status = 'active') AS active,
    COUNT(*) FILTER (WHERE status = 'inactive') AS inactive
FROM users;

-- STRING_AGG and ARRAY_AGG
SELECT dept_id,
    STRING_AGG(name, ', ' ORDER BY name) AS names,
    ARRAY_AGG(salary) AS salaries
FROM employees GROUP BY dept_id;

-- Statistical: correlation, regression
SELECT CORR(revenue, ad_spend) AS r,
       REGR_SLOPE(revenue, ad_spend) AS slope,
       REGR_R2(revenue, ad_spend) AS r_squared
FROM campaigns;

-- Ordered set aggregates
SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median,
       MODE() WITHIN GROUP (ORDER BY dept_id) AS most_common_dept
FROM employees;

Set Operations

SELECT name FROM staff_2024
UNION ALL
SELECT name FROM staff_2025
ORDER BY name;

SELECT id FROM table_a
INTERSECT
SELECT id FROM table_b;

SELECT id FROM all_users
EXCEPT
SELECT id FROM blocked_users;

Data Types

TypeDescription
INTEGER, BIGINTSigned integers (64-bit)
SERIAL, BIGSERIALAuto-incrementing integer (backed by sequence)
REAL, FLOAT, DOUBLE PRECISIONIEEE 754 floating point
NUMERIC(p,s)Exact decimal with precision and scale
TEXT, VARCHAR(n)Variable-length text
BOOLEANTRUE / FALSE
DATECalendar date (YYYY-MM-DD)
TIMESTAMP, TIMESTAMPTZDate and time (with optional timezone)
INTERVALTime duration
JSON, JSONBJSON data (access via ->, ->>)
UUIDUniversally unique identifier
BYTEABinary data
INTEGER[]Array type
VECTOR(N)N-dimensional vector (stored as JSON array)
Note: VECTOR(N) is a storage type, not an index directive. Vectors are stored in the document store. Create an HNSW index explicitly with CREATE INDEX ... USING hnsw for approximate nearest neighbor search, or use brute-force exact search without an index.

Scalar Functions

String

UPPER, LOWER, LENGTH, SUBSTRING, TRIM/LTRIM/RTRIM, CONCAT, CONCAT_WS, REPLACE, POSITION, LPAD/RPAD, REPEAT, REVERSE, SPLIT_PART, LEFT/RIGHT, INITCAP, TRANSLATE, OVERLAY, FORMAT, MD5, CHR/ASCII, STARTS_WITH, ENCODE/DECODE

Regex

REGEXP_MATCH, REGEXP_REPLACE, REGEXP_SPLIT_TO_ARRAY, REGEXP_SPLIT_TO_TABLE

Math

ABS, ROUND, CEIL/FLOOR, TRUNC, POWER/SQRT/CBRT, LOG/LN/EXP, MOD, SIGN, PI, RANDOM, DEGREES/RADIANS, GCD/LCM, FACTORIAL, WIDTH_BUCKET

Trigonometric: SIN, COS, TAN, ASIN, ACOS, ATAN, ATAN2

Date/Time

NOW(), CURRENT_DATE, CURRENT_TIMESTAMP, CLOCK_TIMESTAMP(), EXTRACT(field FROM col), DATE_TRUNC('unit', col), DATE_PART('field', col), AGE(), TO_CHAR/TO_DATE/TO_TIMESTAMP, MAKE_DATE/MAKE_TIMESTAMP/MAKE_INTERVAL, ISFINITE, OVERLAPS

Conditional

COALESCE, NULLIF, GREATEST, LEAST, CASE ... WHEN ... THEN ... ELSE ... END

Table Functions (FROM clause)

SELECT * FROM GENERATE_SERIES(1, 10);
SELECT * FROM GENERATE_SERIES('2024-01-01'::timestamp, '2024-12-01'::timestamp, '1 month');
SELECT * FROM UNNEST(ARRAY[1, 2, 3]);
SELECT * FROM REGEXP_SPLIT_TO_TABLE('a,b,c', ',');

JSON/JSONB

Operators

OperatorDescriptionExample
->Get field as JSONdata->'name'
->>Get field as textdata->>'name'
#>Get nested path as JSONdata#>'{a,b}'
#>>Get nested path as textdata#>>'{a,b}'
@>Containsdata @> '{"key":"val"}'
<@Contained by'{"a":1}' <@ data
?Key existsdata ? 'key'

Functions

SELECT JSON_BUILD_OBJECT('name', 'Alice', 'age', 30);
SELECT JSON_BUILD_ARRAY(1, 2, 'three');
SELECT JSON_TYPEOF('[1,2]'::jsonb);     -- 'array'
SELECT JSON_ARRAY_LENGTH('[1,2,3]');    -- 3
SELECT * FROM JSON_EACH('{"a":1,"b":2}');
SELECT * FROM JSON_ARRAY_ELEMENTS('[1,2,3]');
SELECT JSONB_SET('{"a":1}'::jsonb, '{b}', '2');

Vector Search

UQA follows the pgvector model: VECTOR(N) is a storage type. Vectors are stored in the document store as JSON arrays. HNSW indexes are created explicitly.

-- Create table with vector column
CREATE TABLE papers (
    id SERIAL PRIMARY KEY,
    title TEXT,
    embedding VECTOR(384)
);

-- Insert vectors
INSERT INTO papers (title, embedding) VALUES
    ('attention is all you need', ARRAY[0.1, 0.2, ...]);

-- Brute-force KNN (no index needed, exact results)
SELECT title, _score FROM papers
WHERE knn_match(embedding, ARRAY[0.1, 0.2, ...], 5)
ORDER BY _score DESC;

-- Create HNSW index for approximate (faster) search
CREATE INDEX idx_emb ON papers USING hnsw (embedding)
    WITH (ef_construction = 200, m = 16);

-- Now knn_match uses HNSW (same query syntax)
SELECT title, _score FROM papers
WHERE knn_match(embedding, ARRAY[0.1, 0.2, ...], 10)
ORDER BY _score DESC;

-- Drop and recreate index (vectors remain in document store)
DROP INDEX idx_emb;
CREATE INDEX idx_emb ON papers USING hnsw (embedding);

Graph Queries

Apache AGE compatible openCypher integration for property graph queries on named graphs.

-- Create a named graph
SELECT * FROM create_graph('social');

-- Create vertices and edges
SELECT * FROM cypher('social', $$
    CREATE (a:Person {name: 'Alice', age: 30})
           -[:KNOWS]->
           (b:Person {name: 'Bob', age: 25})
    RETURN a.name, b.name
$$) AS (a agtype, b agtype);

-- Pattern matching with WHERE
SELECT * FROM cypher('social', $$
    MATCH (p:Person)-[:KNOWS]->(friend:Person)
    WHERE p.age > 25
    RETURN p.name, friend.name
    ORDER BY p.name
$$) AS (person agtype, friend agtype);

-- Variable-length paths
SELECT * FROM cypher('social', $$
    MATCH (a:Person {name: 'Alice'})-[:KNOWS*1..3]->(remote)
    RETURN remote.name
$$) AS (name agtype);

-- Graph traversal (SQL functions)
SELECT * FROM traverse(1, 'cited_by', 2);
SELECT * FROM rpq('cited_by/cited_by', 1);

-- Bounded RPQ: paths of 2-3 hops
SELECT * FROM rpq('knows{2,3}', 1);

Centrality

-- Graph centrality (FROM clause)
SELECT _doc_id, title, _score FROM pagerank() ORDER BY _score DESC;
SELECT _doc_id, title, _score FROM hits() ORDER BY _score DESC;
SELECT _doc_id, title, _score FROM betweenness() ORDER BY _score DESC;

-- Centrality on named graph
SELECT _doc_id, _score FROM pagerank('graph:social') ORDER BY _score DESC;

-- Centrality as WHERE signal
SELECT title, _score FROM papers WHERE pagerank() AND year >= 2020;

-- Weighted path query
SELECT title, _score FROM papers
WHERE weighted_rpq('cites/cites', 7, 'weight', 'sum', 6.0);

-- Graph mutation via SQL
SELECT * FROM graph_add_vertex(1, 'person', 'employees');
SELECT * FROM graph_add_edge(1, 1, 2, 'knows', 'employees', 'weight=0.8');

Multi-Signal Fusion

Combine relevance signals from different paradigms (text, vector, graph) into a single ranked result.

-- Log-odds fusion: text + vector + graph
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    knn_match(embedding, ARRAY[0.1, 0.2, ...], 5),
    traverse_match(1, 'cited_by', 2)
)
ORDER BY _score DESC;

-- Probabilistic AND / OR / NOT
SELECT title, _score FROM papers
WHERE fuse_prob_and(
    bayesian_match(title, 'neural'),
    knn_match(embedding, ARRAY[...], 10)
)
ORDER BY _score DESC;

Gating

-- Log-odds with ReLU gating
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    pagerank(),
    'relu'
) ORDER BY _score DESC;

-- Progressive fusion: cascading WAND
SELECT title, _score FROM papers
WHERE progressive_fusion(
    text_match(title, 'attention'),
    traverse_match(1, 'cites', 2), 5,
    pagerank(), 3
) ORDER BY _score DESC;

EXPLAIN SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    knn_match(embedding, ARRAY[...], 5)
);

Geospatial

R*Tree spatial index with Haversine great-circle distance for POINT columns.

-- Create table with POINT column
CREATE TABLE restaurants (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    cuisine TEXT NOT NULL,
    location POINT
);

-- Create R*Tree spatial index
CREATE INDEX idx_loc ON restaurants USING rtree (location);

-- Range query with Haversine distance
SELECT name,
       ROUND(ST_Distance(location, POINT(-73.9857, 40.7484)), 0) AS dist_m
FROM restaurants
WHERE spatial_within(location, POINT(-73.9857, 40.7484), 5000)
ORDER BY dist_m;

-- Spatial + text + vector fusion
SELECT name, _score FROM restaurants
WHERE fuse_log_odds(
    text_match(description, 'pizza'),
    spatial_within(location, POINT(-73.9969, 40.7306), 3000),
    knn_match(embedding, $1, 5)
) ORDER BY _score DESC;

Spatial Functions

FunctionDescription
ST_Distance(p1, p2)Haversine great-circle distance in meters
ST_Within(p1, p2, dist)Distance predicate (boolean)
ST_DWithin(p1, p2, dist)Alias for ST_Within
POINT(x, y)Construct a POINT value (longitude, latitude)
spatial_within(field, point, dist)WHERE-clause geospatial range query (R*Tree + Haversine)

Temporal Graph

Time-aware graph operations, GNN-style message passing, and structural embeddings.

-- Temporal traversal: edges valid at timestamp
SELECT * FROM temporal_traverse(1, 'knows', 2, 1700000000);

-- GNN message passing: aggregate neighbor properties
SELECT * FROM papers
WHERE message_passing(2, 'mean', 'citations');

-- Structural graph embeddings
SELECT * FROM papers
WHERE graph_embedding(16, 3);

Temporal / GNN Functions

FunctionDescription
temporal_traverse(start, lbl, hops, ts)Time-aware graph traversal
message_passing(k, agg, property)GNN k-layer neighbor aggregation
graph_embedding(dims, k)Structural graph embeddings

Foreign Data Wrappers

Query external data sources (Parquet, CSV, JSON, remote servers) as if they were local tables.

-- Register a DuckDB-backed server
CREATE SERVER warehouse FOREIGN DATA WRAPPER duckdb_fdw;

-- Map a Parquet directory with Hive partitioning
CREATE FOREIGN TABLE sales (
    id INTEGER, name TEXT, amount INTEGER,
    year INTEGER, month INTEGER
) SERVER warehouse OPTIONS (
    source '/data/sales/**/*.parquet',
    hive_partitioning 'true'
);

-- Predicates pushed down to DuckDB for partition pruning
SELECT name, SUM(amount) FROM sales
WHERE year IN (2024, 2025) AND month > 6
GROUP BY name;

-- Arrow Flight SQL for remote servers
CREATE SERVER remote FOREIGN DATA WRAPPER arrow_fdw
    OPTIONS (host 'flights.example.com', port '8815');

-- Full SQL works on foreign tables
SELECT s.name, e.dept
FROM sales s JOIN employees e ON s.rep_id = e.id
ORDER BY s.amount DESC LIMIT 10;

Full Query Pushdown (v0.19.0)

When all tables in a query are foreign tables on the same DuckDB server, the entire query is pushed to DuckDB — no Python row materialization. JOINs, aggregates, window functions, subqueries, and CTEs all execute natively.

-- Entire query delegated to DuckDB (41M rows, instant)
SELECT payment_type, COUNT(*) AS trips,
       AVG(total_amount) AS avg_total
FROM taxi_trips
GROUP BY payment_type
ORDER BY trips DESC;

-- Window functions also pushed down
SELECT name, value,
       ROW_NUMBER() OVER (ORDER BY value DESC) AS rn
FROM foreign_data;

Mixed Foreign-Local Queries

Local dimension tables (<100K rows) are shipped to DuckDB as temp tables, letting the full JOIN execute in DuckDB. If DuckDB cannot handle UQA-specific functions (e.g., spatial_within), the standard UQA operator pipeline takes over automatically.

-- Local taxi_zones (263 rows) shipped to DuckDB,
-- JOINed with foreign taxi_trips (41M rows) in DuckDB
SELECT z.zone, z.borough, COUNT(*) AS pickups
FROM taxi_trips t
JOIN taxi_zones z ON t.pu_location_id = z.location_id
GROUP BY z.zone, z.borough
ORDER BY pickups DESC LIMIT 10;

Python API

Engine

from uqa.engine import Engine

# In-memory
engine = Engine()

# Persistent (SQLite-backed)
engine = Engine(db_path="research.db")

# With tuning parameters
engine = Engine(
    db_path="research.db",
    parallel_workers=4,    # ThreadPool concurrency
    spill_threshold=100000 # Disk spill after 100K rows
)

# SQL interface
result = engine.sql("SELECT * FROM papers")
print(result.columns)  # ['id', 'title', 'year']
print(result.rows)     # [{'id': 1, 'title': '...', ...}]

# Arrow / Parquet export
arrow_table = result.to_arrow()
result.to_parquet("papers.parquet")

# Retrieve a document by ID
doc = engine.get_document(1, table="papers")

# Access a table's graph store
gs = engine.get_graph_store("papers")

# Get/set field analyzer
engine.set_table_analyzer("papers", "title", "english_stem")
analyzer = engine.get_table_analyzer("papers", "title")

# Context manager
with Engine(db_path="data.db") as engine:
    engine.sql("SELECT 1")

QueryBuilder (Fluent API)

from uqa.core.types import Equals, GreaterThanOrEqual

# Text search with BM25 scoring
result = (
    engine.query(table="papers")
    .term("attention", field="title")
    .score_bm25("attention")
    .execute()
)

# Boolean composition
q1 = engine.query(table="papers").term("neural", field="title")
q2 = engine.query(table="papers").term("network", field="title")
combined = q1.and_(q2)

# Filter + aggregate
result = (
    engine.query(table="employees")
    .filter("dept", Equals("Engineering"))
    .aggregate("salary", "avg")
)

# Graph traversal
team = engine.query(table="org").traverse(ceo_id, "manages", max_hops=3)
total_salary = team.vertex_aggregate("salary", "sum")

# Multi-signal fusion
result = (
    engine.query(table="papers")
    .term("attention", field="title")
    .score_bayesian_bm25("attention")
    .knn(query_vector, k=10, field="embedding")
    .fuse_log_odds()
    .execute()
)

# Arrow export
arrow_table = engine.query(table="papers").term("ai", field="title").execute_arrow()
engine.query(table="papers").execute_parquet("results.parquet")

Text Analysis

from uqa.analysis import (
    Analyzer, StandardTokenizer, LowerCaseFilter,
    StopWordFilter, PorterStemFilter
)

# Build a custom analyzer
analyzer = Analyzer(
    tokenizer=StandardTokenizer(),
    token_filters=[
        LowerCaseFilter(),
        StopWordFilter(language="english"),
        PorterStemFilter(),
    ],
)

tokens = analyzer.analyze("The quick brown foxes jumped over lazy dogs")
# ['quick', 'brown', 'fox', 'jump', 'lazi', 'dog']

# Register for reuse
from uqa.analysis import register_analyzer
register_analyzer("english_stem", analyzer)

# Or via SQL
engine.sql("""SELECT * FROM create_analyzer('english_stem', '{
    "tokenizer": {"type": "standard"},
    "token_filters": [{"type": "lowercase"}, {"type": "stop"}, {"type": "porter_stem"}]
}')""")

Transactions

BEGIN;
INSERT INTO accounts (name, balance) VALUES ('Alice', 1000);
SAVEPOINT sp1;
UPDATE accounts SET balance = balance - 100 WHERE name = 'Alice';
ROLLBACK TO SAVEPOINT sp1;  -- undo the UPDATE
COMMIT;                     -- INSERT is committed

Python API

with engine.begin() as tx:
    engine.sql("INSERT INTO logs (msg) VALUES ('hello')")
    tx.savepoint("sp1")
    engine.sql("INSERT INTO logs (msg) VALUES ('world')")
    tx.rollback_to("sp1")
    # only 'hello' is committed

EXPLAIN / ANALYZE

-- Show query plan
EXPLAIN SELECT * FROM employees WHERE salary > 80000;

-- Show plan with execution stats
EXPLAIN ANALYZE SELECT e.name, d.name
FROM employees e JOIN departments d ON e.dept_id = d.id;

-- Collect column statistics for the optimizer
ANALYZE employees;