Examples — UQA

Getting Started

Tip: All examples below are self-contained. Copy any block into a Python file or the usql shell and run it directly.

Python

from uqa.engine import Engine

# Create an in-memory engine (or pass db_path for persistence)
engine = Engine()

# Create a table and insert data
engine.sql("""
    CREATE TABLE books (
        id SERIAL PRIMARY KEY,
        title TEXT NOT NULL,
        author TEXT NOT NULL,
        year INTEGER,
        rating REAL
    )
""")

engine.sql("""INSERT INTO books (title, author, year, rating) VALUES
    ('The Pragmatic Programmer', 'Hunt & Thomas', 1999, 4.6),
    ('Clean Code', 'Robert Martin', 2008, 4.4),
    ('Designing Data-Intensive Applications', 'Martin Kleppmann', 2017, 4.8),
    ('Structure and Interpretation of Computer Programs', 'Abelson & Sussman', 1984, 4.5),
    ('Introduction to Algorithms', 'Cormen et al.', 2009, 4.3)
""")

# Query
result = engine.sql("""
    SELECT title, author, rating
    FROM books
    WHERE year >= 2000
    ORDER BY rating DESC
""")

for row in result:
    print(f"{row['title']}: {row['rating']}")

engine.close()

Full-Text Search

SQL

BM25 Search

-- Standard BM25 full-text search
SELECT title, _score FROM papers
WHERE text_match(title, 'attention mechanism transformer')
ORDER BY _score DESC;

-- Bayesian BM25: calibrated P(relevant) in [0,1]
SELECT title, _score FROM papers
WHERE bayesian_match(title, 'deep learning optimization')
ORDER BY _score DESC;

-- Combine text search with relational filters
SELECT title, year, _score FROM papers
WHERE text_match(title, 'neural network')
    AND year >= 2020
    AND citations > 100
ORDER BY _score DESC LIMIT 10;

Python

Fluent API Text Search

result = (
    engine.query(table="papers")
    .term("attention", field="title")
    .score_bayesian_bm25("attention")
    .execute()
)

for entry in result.entries:
    print(f"doc_id={entry.doc_id}, score={entry.payload.score:.4f}")

Vector Search

SQL

KNN with HNSW Index

-- Create table with vector column
CREATE TABLE documents (
    id SERIAL PRIMARY KEY,
    title TEXT,
    embedding VECTOR(4)
);

-- Insert documents with embeddings
INSERT INTO documents (title, embedding) VALUES
    ('machine learning basics',    ARRAY[0.9, 0.1, 0.0, 0.2]),
    ('deep neural networks',       ARRAY[0.8, 0.3, 0.1, 0.1]),
    ('natural language processing', ARRAY[0.2, 0.8, 0.7, 0.1]),
    ('computer vision models',     ARRAY[0.7, 0.2, 0.3, 0.5]),
    ('reinforcement learning',     ARRAY[0.6, 0.1, 0.2, 0.8]);

-- Brute-force exact KNN (no index)
SELECT title, _score FROM documents
WHERE knn_match(embedding, ARRAY[0.85, 0.15, 0.05, 0.15], 3)
ORDER BY _score DESC;

-- Create HNSW index for faster approximate search
CREATE INDEX idx_doc_emb ON documents USING hnsw (embedding)
    WITH (ef_construction = 128, m = 16);

-- Same query now uses HNSW (transparent to the user)
SELECT title, _score FROM documents
WHERE knn_match(embedding, ARRAY[0.85, 0.15, 0.05, 0.15], 3)
ORDER BY _score DESC;

Python

Fluent API Vector Search

query_vec = [0.85, 0.15, 0.05, 0.15]

# KNN search
result = (
    engine.query(table="documents")
    .knn(query_vec, k=3, field="embedding")
    .execute()
)

# Threshold-based search
result = (
    engine.query(table="documents")
    .vector(query_vec, threshold=0.7, field="embedding")
    .execute()
)

Hybrid Fusion

Combine text, vector, and graph signals into a single relevance score.

SQL

-- Log-odds fusion: text + vector
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention transformer'),
    knn_match(embedding, ARRAY[0.1, 0.2, 0.3, 0.4], 10)
)
ORDER BY _score DESC LIMIT 5;

-- Three-signal fusion: text + vector + graph
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    knn_match(embedding, ARRAY[0.1, 0.2, ...], 5),
    traverse_match(1, 'cited_by', 2)
) AND year >= 2020
ORDER BY _score DESC;

-- Probabilistic boolean: find papers matching text OR similar vectors
SELECT title, _score FROM papers
WHERE fuse_prob_or(
    bayesian_match(title, 'neural network'),
    knn_match(embedding, ARRAY[...], 10)
)
ORDER BY _score DESC;

-- Exclude certain topics with probabilistic NOT
SELECT title, _score FROM papers
WHERE fuse_prob_and(
    text_match(title, 'machine learning'),
    fuse_prob_not(text_match(title, 'supervised'))
)
ORDER BY _score DESC;

SQL

Bayesian KNN + Named Fusion Options

-- Bayesian vector calibration with auto-routing (Paper 5)
SELECT title, _score FROM papers
WHERE bayesian_knn_match(embedding, $1, 10, method => 'auto')
ORDER BY _score DESC;

-- Attention fusion with normalization (top-performing pipeline)
SELECT title, _score FROM papers
WHERE fuse_attention(
    text_match(title, 'attention transformer'),
    bayesian_knn_match(embedding, $1, 10),
    normalized => true
)
ORDER BY _score DESC;

-- Multi-head attention: 4 independent heads averaged in log-odds space
SELECT title, _score FROM papers
WHERE fuse_multihead(
    text_match(title, 'attention'),
    bayesian_knn_match(embedding, $1, 10),
    n_heads => 4,
    normalized => true
)
ORDER BY _score DESC;

-- Gated fusion with Swish activation and beta control
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    knn_match(embedding, $1, 10),
    gating => 'swish',
    gating_beta => 2.0
)
ORDER BY _score DESC;

Python

# Fluent API: multi-signal fusion
result = (
    engine.query(table="papers")
    .term("attention", field="title")
    .score_bayesian_bm25("attention")
    .knn(query_vector, k=10, field="embedding")
    .fuse_log_odds()
    .execute()
)

for entry in result.top_k(5):
    print(f"doc_id={entry.doc_id}, score={entry.payload.score:.4f}")

JOINs & Subqueries

SQL

-- Setup
CREATE TABLE departments (id SERIAL PRIMARY KEY, name TEXT, budget INTEGER);
CREATE TABLE employees (
    id SERIAL PRIMARY KEY, name TEXT,
    dept_id INTEGER REFERENCES departments(id), salary INTEGER
);

INSERT INTO departments (name, budget) VALUES
    ('Engineering', 500000), ('Marketing', 200000), ('Research', 300000);

INSERT INTO employees (name, dept_id, salary) VALUES
    ('Alice', 1, 120000), ('Bob', 1, 110000),
    ('Carol', 2, 95000), ('Dave', 3, 130000),
    ('Eve', 3, 125000);

-- JOIN with aggregation
SELECT d.name AS department,
       COUNT(*) AS headcount,
       AVG(e.salary) AS avg_salary
FROM employees e
INNER JOIN departments d ON e.dept_id = d.id
GROUP BY d.name
ORDER BY avg_salary DESC;

-- Employees earning above their department average
SELECT e.name, e.salary, dept_avg.avg_sal
FROM employees e
JOIN (SELECT dept_id, AVG(salary) AS avg_sal
      FROM employees GROUP BY dept_id) dept_avg
    ON e.dept_id = dept_avg.dept_id
WHERE e.salary > dept_avg.avg_sal;

-- Top earner per department (LATERAL subquery)
SELECT d.name, top.name AS top_earner, top.salary
FROM departments d,
     LATERAL (SELECT name, salary FROM employees e
              WHERE e.dept_id = d.id
              ORDER BY salary DESC LIMIT 1) top;

-- Recursive org chart
CREATE TABLE org (id SERIAL, name TEXT, manager_id INTEGER);
INSERT INTO org (name, manager_id) VALUES
    ('CEO', NULL), ('VP Eng', 1), ('VP Sales', 1),
    ('Lead Dev', 2), ('Sr Dev', 4);

WITH RECURSIVE tree AS (
    SELECT id, name, 0 AS depth FROM org WHERE manager_id IS NULL
    UNION ALL
    SELECT o.id, o.name, t.depth + 1
    FROM org o JOIN tree t ON o.manager_id = t.id
)
SELECT REPEAT('  ', depth) || name AS org_chart FROM tree;

Analytics

SQL

Window Functions

CREATE TABLE sales (
    id SERIAL, rep TEXT, region TEXT,
    amount INTEGER, sale_date DATE
);

INSERT INTO sales (rep, region, amount, sale_date) VALUES
    ('Alice', 'East', 500, '2024-01-15'),
    ('Alice', 'East', 300, '2024-02-10'),
    ('Bob',   'West', 700, '2024-01-20'),
    ('Bob',   'West', 400, '2024-03-05'),
    ('Carol', 'East', 600, '2024-02-28');

-- Running total per rep
SELECT rep, sale_date, amount,
    SUM(amount) OVER (PARTITION BY rep ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total
FROM sales;

-- Rank within region
SELECT rep, region, SUM(amount) AS total,
    RANK() OVER (PARTITION BY region ORDER BY SUM(amount) DESC) AS rnk
FROM sales
GROUP BY rep, region;

Aggregates with FILTER

-- Conditional aggregation
SELECT region,
    SUM(amount) AS total,
    SUM(amount) FILTER (WHERE amount >= 500) AS big_deals,
    COUNT(*) FILTER (WHERE amount < 500) AS small_deal_count
FROM sales GROUP BY region;

-- Monthly summary with date functions
SELECT DATE_TRUNC('month', sale_date) AS month,
    COUNT(*) AS num_sales,
    SUM(amount) AS revenue,
    ROUND(AVG(amount), 2) AS avg_deal
FROM sales
GROUP BY DATE_TRUNC('month', sale_date)
ORDER BY month;

JSON Analytics

CREATE TABLE events (id SERIAL, data JSONB);
INSERT INTO events (data) VALUES
    ('{"type": "click", "page": "/home", "duration": 5}'::jsonb),
    ('{"type": "click", "page": "/about", "duration": 3}'::jsonb),
    ('{"type": "view", "page": "/home", "duration": 12}'::jsonb);

SELECT data->>'type' AS event_type,
    data->>'page' AS page,
    AVG(CAST(data->>'duration' AS INTEGER)) AS avg_duration
FROM events
GROUP BY data->>'type', data->>'page';

Graph Queries

SQL

Social Network

-- Create named graph
SELECT * FROM create_graph('social');

-- Build the graph
SELECT * FROM cypher('social', $$
    CREATE (a:Person {name: 'Alice', age: 30, city: 'Seoul'})
    CREATE (b:Person {name: 'Bob', age: 25, city: 'Tokyo'})
    CREATE (c:Person {name: 'Carol', age: 35, city: 'Seoul'})
    CREATE (d:Person {name: 'Dave', age: 28, city: 'London'})
    CREATE (a)-[:KNOWS {since: 2020}]->(b)
    CREATE (a)-[:KNOWS {since: 2019}]->(c)
    CREATE (b)-[:KNOWS {since: 2021}]->(d)
    CREATE (c)-[:KNOWS {since: 2022}]->(d)
    RETURN a.name
$$) AS (name agtype);

-- Friends of friends
SELECT * FROM cypher('social', $$
    MATCH (a:Person {name: 'Alice'})-[:KNOWS]->(friend)-[:KNOWS]->(fof)
    WHERE fof.name <> 'Alice'
    RETURN DISTINCT fof.name, fof.city
$$) AS (name agtype, city agtype);

-- Variable-length path: reachable within 3 hops
SELECT * FROM cypher('social', $$
    MATCH (a:Person {name: 'Alice'})-[:KNOWS*1..3]->(reachable)
    RETURN DISTINCT reachable.name
$$) AS (name agtype);

-- MERGE: create if not exists, update if exists
SELECT * FROM cypher('social', $$
    MERGE (p:Person {name: 'Eve'})
    ON CREATE SET p.age = 26
    ON MATCH SET p.last_seen = 'today'
    RETURN p.name, p.age
$$) AS (name agtype, age agtype);

-- Aggregation in Cypher
SELECT * FROM cypher('social', $$
    MATCH (p:Person)
    WITH p.city AS city, COUNT(*) AS cnt
    WHERE cnt > 1
    RETURN city, cnt
    ORDER BY cnt DESC
$$) AS (city agtype, cnt agtype);

SQL

SQL Graph Functions

-- BFS traversal from vertex 1, up to 2 hops via 'cited_by'
SELECT _doc_id, title FROM traverse(1, 'cited_by', 2);

-- Regular path query: 2-hop cited_by chain
SELECT _doc_id, title FROM rpq('cited_by/cited_by', 1);

-- Graph reachability as a scored signal (for fusion)
SELECT title, _score FROM papers
WHERE traverse_match(1, 'cited_by', 3)
ORDER BY _score DESC;

SQL

Graph Centrality & Weighted Paths

-- PageRank as FROM table source
SELECT _doc_id, title, _score FROM pagerank() ORDER BY _score DESC;

-- HITS and betweenness centrality
SELECT _doc_id, title, _score FROM hits() ORDER BY _score DESC;
SELECT _doc_id, title, _score FROM betweenness() ORDER BY _score DESC;

-- Centrality on named graph
SELECT _doc_id, _score FROM pagerank(0.85, 'citations');

-- Centrality as WHERE signal + relational filter
SELECT title, _score FROM papers
WHERE pagerank() AND year >= 2020 ORDER BY _score DESC;

-- Named graph in WHERE signal
SELECT title, _score FROM papers
WHERE pagerank('citations') ORDER BY _score DESC;

-- Bounded RPQ: 2-3 hops
SELECT _doc_id, title FROM rpq('cites{2,3}', 7);

-- Weighted path query with threshold
SELECT title, _score FROM papers
WHERE weighted_rpq('cites/cites', 7, 'weight', 'sum', 6.0);

-- Graph mutation via SQL
SELECT * FROM graph_add_vertex(1, 'person', 'employees');
SELECT * FROM graph_add_edge(1, 1, 2, 'knows', 'employees');

-- Fusion: text + centrality with named graph + gating
SELECT title, _score FROM papers
WHERE fuse_log_odds(
    text_match(title, 'attention'),
    pagerank('citations'),
    gating => 'relu'
) ORDER BY _score DESC;

-- Progressive fusion: cascading WAND
SELECT title, _score FROM papers
WHERE progressive_fusion(
    text_match(title, 'attention'),
    traverse_match(1, 'cites', 2), 5,
    pagerank(), 3
) ORDER BY _score DESC;

Foreign Data Wrappers & Parquet

SQL

DuckDB FDW with Hive Partitioning

-- Register DuckDB server
CREATE SERVER dw FOREIGN DATA WRAPPER duckdb_fdw;

-- Map a Parquet directory with Hive partitioning
CREATE FOREIGN TABLE orders (
    order_id INTEGER, customer TEXT, amount REAL,
    year INTEGER, month INTEGER
) SERVER dw OPTIONS (
    source '/data/orders/**/*.parquet',
    hive_partitioning 'true'
);

-- Query with predicate pushdown (partition pruning)
SELECT customer, SUM(amount) AS total
FROM orders
WHERE year = 2025 AND month BETWEEN 1 AND 3
GROUP BY customer
ORDER BY total DESC LIMIT 10;

-- Join foreign table with local table
SELECT o.customer, e.name AS rep, SUM(o.amount)
FROM orders o
JOIN employees e ON o.customer = e.name
GROUP BY o.customer, e.name;

CSV Files

CREATE FOREIGN TABLE logs (
    ts TEXT, level TEXT, msg TEXT
) SERVER dw OPTIONS (source '/var/log/app.csv');

SELECT level, COUNT(*) FROM logs GROUP BY level;

Text Analysis Pipeline

Python

from uqa.analysis import (
    Analyzer, StandardTokenizer, LetterTokenizer,
    NGramTokenizer, KeywordTokenizer,
    LowerCaseFilter, StopWordFilter, PorterStemFilter,
    ASCIIFoldingFilter, SynonymFilter, EdgeNGramFilter,
    HTMLStripCharFilter, MappingCharFilter,
)

# Standard English analyzer with stemming
english = Analyzer(
    tokenizer=StandardTokenizer(),
    token_filters=[
        LowerCaseFilter(),
        StopWordFilter(language="english"),
        PorterStemFilter(),
    ],
)
print(english.analyze("The quick brown foxes are jumping"))
# ['quick', 'brown', 'fox', 'jump']

# Autocomplete analyzer with edge n-grams
autocomplete = Analyzer(
    tokenizer=StandardTokenizer(),
    token_filters=[
        LowerCaseFilter(),
        EdgeNGramFilter(min_gram=2, max_gram=8),
    ],
)
print(autocomplete.analyze("machine learning"))
# ['ma', 'mac', 'mach', ..., 'le', 'lea', 'lear', ...]

# Synonym expansion
synonyms = Analyzer(
    tokenizer=StandardTokenizer(),
    token_filters=[
        LowerCaseFilter(),
        SynonymFilter(synonyms={"ml": ["machine learning"], "ai": ["artificial intelligence"]}),
    ],
)
print(synonyms.analyze("ml models"))
# ['ml', 'machine learning', 'models']

# HTML stripping
html_analyzer = Analyzer(
    char_filters=[HTMLStripCharFilter()],
    tokenizer=StandardTokenizer(),
    token_filters=[LowerCaseFilter()],
)
print(html_analyzer.analyze("<p>Hello <b>World</b></p>"))
# ['hello', 'world']

SQL

-- Create analyzer via SQL
SELECT * FROM create_analyzer('my_english', '{
    "tokenizer": {"type": "standard"},
    "token_filters": [
        {"type": "lowercase"},
        {"type": "stop", "language": "english"},
        {"type": "porter_stem"}
    ],
    "char_filters": [{"type": "html_strip"}]
}');

-- List all analyzers
SELECT * FROM list_analyzers();

-- Drop a custom analyzer
SELECT * FROM drop_analyzer('my_english');

Arrow / Parquet Export

Python

from uqa.engine import Engine

engine = Engine()
engine.sql("CREATE TABLE t (id SERIAL, name TEXT, score REAL)")
engine.sql("INSERT INTO t (name, score) VALUES ('a', 1.5), ('b', 2.3), ('c', 0.8)")

# SQL result to Arrow Table (zero-copy from execution engine)
result = engine.sql("SELECT * FROM t ORDER BY score DESC")
arrow_table = result.to_arrow()
print(arrow_table.schema)
print(arrow_table.to_pandas())

# SQL result to Parquet file
result.to_parquet("output.parquet")

# Fluent API to Arrow
arrow_table = (
    engine.query(table="t")
    .execute_arrow()
)

# Fluent API to Parquet
engine.query(table="t").execute_parquet("fluent_output.parquet")

engine.close()

Fluent QueryBuilder API

Python

Boolean Composition

from uqa.engine import Engine
from uqa.core.types import Equals, GreaterThanOrEqual, Between

engine = Engine()
# ... (create and populate table)

# Boolean AND: intersection of two term queries
q1 = engine.query(table="papers").term("neural", field="title")
q2 = engine.query(table="papers").term("network", field="title")
both = q1.and_(q2).execute()

# Boolean OR: union of results
either = q1.or_(q2).execute()

# Relational filter
result = (
    engine.query(table="papers")
    .filter("year", GreaterThanOrEqual(2020))
    .filter("citations", Between(100, 10000))
    .execute()
)

# Facets: count documents per field value
facets = engine.query(table="papers").facet("category")
for value, count in facets.items():
    print(f"{value}: {count}")

EXPLAIN

# View the query plan
plan = (
    engine.query(table="papers")
    .term("attention", field="title")
    .score_bm25("attention")
    .knn(query_vector, k=5, field="embedding")
    .fuse_log_odds()
    .explain()
)
print(plan)

Hierarchical Data

Python

from uqa.core.types import Equals

# Add nested documents
engine.add_document(table="orders", document={
    "customer": "Alice",
    "shipping": {"city": "Seoul", "country": "KR"},
    "items": [
        {"name": "Widget", "price": 25.00, "qty": 3},
        {"name": "Gadget", "price": 49.99, "qty": 1},
    ]
})

# Path filter: find orders shipping to Seoul
result = (
    engine.query(table="orders")
    .path_filter("shipping.city", Equals("Seoul"))
    .execute()
)

# Path aggregate: total price across items
result = (
    engine.query(table="orders")
    .path_aggregate("items.price", "sum")
    .execute()
)

SQL

-- SQL equivalents
SELECT * FROM orders WHERE path_filter('shipping.city', 'Seoul');
SELECT *, path_agg('items.price', 'sum') AS total FROM orders;
SELECT path_value('shipping.country') AS country FROM orders;

Source Files

Complete runnable examples are in the repository:

Showcase

7 showcase examples · examples/showcase/
Four-paradigm unification, Bayesian calibration, neural emergence, deep fusion (ResNet, GNN, CNN, full NN pipeline)

Quickstart

examples/quickstart.py
Hybrid search (text + vector + fusion) in under 30 lines

SQL Examples

examples/sql/
basics, functions, graph, graph_centrality, named_graphs, fusion, fusion_gating, joins_and_subqueries, analytics, analysis, export, fdw

Fluent API Examples

examples/fluent/
text_search, vector_and_hybrid, graph, graph_centrality, named_graphs, hierarchical, multi_paradigm, analysis, export

Shell

# Quickstart
python examples/quickstart.py

# Run all examples
python examples/sql/basics.py
python examples/sql/functions.py
python examples/sql/graph.py
python examples/sql/named_graphs.py
python examples/sql/fusion.py
python examples/sql/joins_and_subqueries.py
python examples/sql/analytics.py
python examples/sql/analysis.py
python examples/sql/export.py
python examples/sql/fdw.py

python examples/fluent/text_search.py
python examples/fluent/vector_and_hybrid.py
python examples/fluent/graph.py
python examples/fluent/named_graphs.py
python examples/fluent/hierarchical.py
python examples/fluent/multi_paradigm.py
python examples/fluent/analysis.py
python examples/fluent/export.py