Quick Start
Installation
# From PyPI
pip install uqa
# From source
pip install -e .
# With dev dependencies
pip install -e ".[dev]"
Interactive Shell
usql # In-memory database
usql --db mydata.db # Persistent SQLite-backed database
Python
from uqa.engine import Engine
engine = Engine(db_path="research.db")
engine.sql("""
CREATE TABLE papers (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
year INTEGER NOT NULL
)
""")
engine.sql("""INSERT INTO papers (title, year) VALUES
('attention is all you need', 2017),
('bert pre-training', 2019)
""")
result = engine.sql("SELECT * FROM papers ORDER BY year")
print(result)
engine.close()
Data Definition Language
CREATE TABLE
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
email TEXT UNIQUE,
dept_id INTEGER REFERENCES departments(id),
salary NUMERIC(10,2) DEFAULT 0,
active BOOLEAN DEFAULT TRUE,
hired_at TIMESTAMP DEFAULT NOW(),
CHECK (salary >= 0)
);
CREATE TABLE IF NOT EXISTS logs (id SERIAL, msg TEXT);
CREATE TEMPORARY TABLE scratch (x INTEGER);
CREATE TABLE summary AS SELECT dept_id, COUNT(*) FROM employees GROUP BY dept_id;
ALTER TABLE
ALTER TABLE employees ADD COLUMN bio TEXT;
ALTER TABLE employees DROP COLUMN bio;
ALTER TABLE employees RENAME COLUMN name TO full_name;
ALTER TABLE employees RENAME TO staff;
ALTER TABLE employees ALTER COLUMN salary SET DEFAULT 50000;
ALTER TABLE employees ALTER COLUMN salary SET NOT NULL;
Indexes
-- B-tree index
CREATE INDEX idx_dept ON employees (dept_id);
-- HNSW vector index (explicit creation required)
CREATE INDEX idx_emb ON papers USING hnsw (embedding)
WITH (ef_construction = 200, m = 16);
DROP INDEX idx_dept;
DROP INDEX IF EXISTS idx_emb;
Sequences
CREATE SEQUENCE order_seq;
SELECT NEXTVAL('order_seq');
SELECT CURRVAL('order_seq');
SELECT SETVAL('order_seq', 100);
ALTER SEQUENCE order_seq RESTART WITH 1;
Views
CREATE VIEW active_staff AS
SELECT * FROM employees WHERE active = TRUE;
DROP VIEW IF EXISTS active_staff;
Data Manipulation Language
INSERT
INSERT INTO employees (name, salary) VALUES ('Alice', 90000);
-- Multiple rows
INSERT INTO employees (name, salary) VALUES
('Bob', 85000), ('Carol', 92000);
-- Insert from query
INSERT INTO archive SELECT * FROM employees WHERE active = FALSE;
-- UPSERT
INSERT INTO employees (id, name, salary) VALUES (1, 'Alice', 95000)
ON CONFLICT (id) DO UPDATE SET salary = EXCLUDED.salary;
-- RETURNING
INSERT INTO employees (name) VALUES ('Dave') RETURNING id, name;
UPDATE
UPDATE employees SET salary = salary * 1.1 WHERE dept_id = 3;
-- UPDATE with FROM (join)
UPDATE employees e SET salary = salary + d.bonus
FROM departments d WHERE e.dept_id = d.id;
UPDATE employees SET active = FALSE WHERE id = 5 RETURNING *;
DELETE
DELETE FROM employees WHERE active = FALSE;
-- DELETE with USING (join)
DELETE FROM employees e USING departments d
WHERE e.dept_id = d.id AND d.name = 'Closed';
DELETE FROM employees WHERE id = 10 RETURNING id, name;
Queries
SELECT DISTINCT dept_id, COUNT(*) AS cnt,
AVG(salary) AS avg_sal
FROM employees
WHERE active = TRUE AND salary BETWEEN 50000 AND 150000
GROUP BY dept_id
HAVING COUNT(*) > 3
ORDER BY avg_sal DESC NULLS LAST
LIMIT 10 OFFSET 0;
JOINs
All standard JOIN types are supported. The DPccp optimizer automatically selects the optimal join order for 2+ tables.
-- INNER JOIN
SELECT e.name, d.name AS dept
FROM employees e INNER JOIN departments d ON e.dept_id = d.id;
-- LEFT / RIGHT / FULL OUTER JOIN
SELECT e.name, d.name
FROM employees e LEFT JOIN departments d ON e.dept_id = d.id;
-- CROSS JOIN
SELECT * FROM colors CROSS JOIN sizes;
-- Non-equality condition
SELECT * FROM events a JOIN events b
ON a.end_time >= b.start_time AND a.end_time <= b.end_time;
-- LATERAL subquery
SELECT d.name, top.name, top.salary
FROM departments d,
LATERAL (SELECT * FROM employees e
WHERE e.dept_id = d.id
ORDER BY salary DESC LIMIT 3) top;
Subqueries
-- IN subquery
SELECT * FROM employees
WHERE dept_id IN (SELECT id FROM departments WHERE budget > 100000);
-- EXISTS
SELECT * FROM departments d
WHERE EXISTS (SELECT 1 FROM employees e WHERE e.dept_id = d.id);
-- Scalar subquery
SELECT name, salary,
salary - (SELECT AVG(salary) FROM employees) AS diff
FROM employees;
-- Derived table
SELECT * FROM (SELECT dept_id, AVG(salary) AS avg_sal
FROM employees GROUP BY dept_id) sub
WHERE sub.avg_sal > 80000;
Common Table Expressions
-- Simple CTE
WITH dept_stats AS (
SELECT dept_id, AVG(salary) AS avg_sal
FROM employees GROUP BY dept_id
)
SELECT e.name, d.avg_sal
FROM employees e JOIN dept_stats d ON e.dept_id = d.dept_id;
-- Recursive CTE
WITH RECURSIVE tree AS (
SELECT id, name, 1 AS depth
FROM org WHERE parent_id IS NULL
UNION ALL
SELECT o.id, o.name, t.depth + 1
FROM org o JOIN tree t ON o.parent_id = t.id
)
SELECT * FROM tree ORDER BY depth;
Window Functions
-- Ranking
SELECT name, salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS rn,
RANK() OVER (ORDER BY salary DESC) AS rnk,
DENSE_RANK() OVER (ORDER BY salary DESC) AS drnk
FROM employees;
-- Running total with frame
SELECT sale_date, amount,
SUM(amount) OVER (ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running
FROM sales;
-- LAG / LEAD
SELECT name, salary,
LAG(salary, 1) OVER (ORDER BY salary) AS prev,
LEAD(salary, 1) OVER (ORDER BY salary) AS next
FROM employees;
-- Named window
SELECT name,
SUM(salary) OVER w,
AVG(salary) OVER w
FROM employees
WINDOW w AS (PARTITION BY dept_id ORDER BY hired_at);
Aggregates
-- Basic
SELECT dept_id,
COUNT(*), SUM(salary), AVG(salary),
MIN(salary), MAX(salary),
STDDEV(salary), VARIANCE(salary)
FROM employees GROUP BY dept_id;
-- FILTER clause
SELECT
COUNT(*) FILTER (WHERE status = 'active') AS active,
COUNT(*) FILTER (WHERE status = 'inactive') AS inactive
FROM users;
-- STRING_AGG and ARRAY_AGG
SELECT dept_id,
STRING_AGG(name, ', ' ORDER BY name) AS names,
ARRAY_AGG(salary) AS salaries
FROM employees GROUP BY dept_id;
-- Statistical: correlation, regression
SELECT CORR(revenue, ad_spend) AS r,
REGR_SLOPE(revenue, ad_spend) AS slope,
REGR_R2(revenue, ad_spend) AS r_squared
FROM campaigns;
-- Ordered set aggregates
SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) AS median,
MODE() WITHIN GROUP (ORDER BY dept_id) AS most_common_dept
FROM employees;
Set Operations
SELECT name FROM staff_2024
UNION ALL
SELECT name FROM staff_2025
ORDER BY name;
SELECT id FROM table_a
INTERSECT
SELECT id FROM table_b;
SELECT id FROM all_users
EXCEPT
SELECT id FROM blocked_users;
Data Types
| Type | Description |
|---|---|
INTEGER, BIGINT | Signed integers (64-bit) |
SERIAL, BIGSERIAL | Auto-incrementing integer (backed by sequence) |
REAL, FLOAT, DOUBLE PRECISION | IEEE 754 floating point |
NUMERIC(p,s) | Exact decimal with precision and scale |
TEXT, VARCHAR(n) | Variable-length text |
BOOLEAN | TRUE / FALSE |
DATE | Calendar date (YYYY-MM-DD) |
TIMESTAMP, TIMESTAMPTZ | Date and time (with optional timezone) |
INTERVAL | Time duration |
JSON, JSONB | JSON data (access via ->, ->>) |
UUID | Universally unique identifier |
BYTEA | Binary data |
INTEGER[] | Array type |
VECTOR(N) | N-dimensional vector (stored as JSON array) |
VECTOR(N) is a storage type, not an index directive. Vectors are stored in the document store. Create an HNSW index explicitly with CREATE INDEX ... USING hnsw for approximate nearest neighbor search, or use brute-force exact search without an index.
Scalar Functions
String
UPPER, LOWER, LENGTH, SUBSTRING, TRIM/LTRIM/RTRIM, CONCAT, CONCAT_WS, REPLACE, POSITION, LPAD/RPAD, REPEAT, REVERSE, SPLIT_PART, LEFT/RIGHT, INITCAP, TRANSLATE, OVERLAY, FORMAT, MD5, CHR/ASCII, STARTS_WITH, ENCODE/DECODE
Regex
REGEXP_MATCH, REGEXP_REPLACE, REGEXP_SPLIT_TO_ARRAY, REGEXP_SPLIT_TO_TABLE
Math
ABS, ROUND, CEIL/FLOOR, TRUNC, POWER/SQRT/CBRT, LOG/LN/EXP, MOD, SIGN, PI, RANDOM, DEGREES/RADIANS, GCD/LCM, FACTORIAL, WIDTH_BUCKET
Trigonometric: SIN, COS, TAN, ASIN, ACOS, ATAN, ATAN2
Date/Time
NOW(), CURRENT_DATE, CURRENT_TIMESTAMP, CLOCK_TIMESTAMP(), EXTRACT(field FROM col), DATE_TRUNC('unit', col), DATE_PART('field', col), AGE(), TO_CHAR/TO_DATE/TO_TIMESTAMP, MAKE_DATE/MAKE_TIMESTAMP/MAKE_INTERVAL, ISFINITE, OVERLAPS
Conditional
COALESCE, NULLIF, GREATEST, LEAST, CASE ... WHEN ... THEN ... ELSE ... END
Table Functions (FROM clause)
SELECT * FROM GENERATE_SERIES(1, 10);
SELECT * FROM GENERATE_SERIES('2024-01-01'::timestamp, '2024-12-01'::timestamp, '1 month');
SELECT * FROM UNNEST(ARRAY[1, 2, 3]);
SELECT * FROM REGEXP_SPLIT_TO_TABLE('a,b,c', ',');
JSON/JSONB
Operators
| Operator | Description | Example |
|---|---|---|
-> | Get field as JSON | data->'name' |
->> | Get field as text | data->>'name' |
#> | Get nested path as JSON | data#>'{a,b}' |
#>> | Get nested path as text | data#>>'{a,b}' |
@> | Contains | data @> '{"key":"val"}' |
<@ | Contained by | '{"a":1}' <@ data |
? | Key exists | data ? 'key' |
Functions
SELECT JSON_BUILD_OBJECT('name', 'Alice', 'age', 30);
SELECT JSON_BUILD_ARRAY(1, 2, 'three');
SELECT JSON_TYPEOF('[1,2]'::jsonb); -- 'array'
SELECT JSON_ARRAY_LENGTH('[1,2,3]'); -- 3
SELECT * FROM JSON_EACH('{"a":1,"b":2}');
SELECT * FROM JSON_ARRAY_ELEMENTS('[1,2,3]');
SELECT JSONB_SET('{"a":1}'::jsonb, '{b}', '2');
Vector Search
UQA follows the pgvector model: VECTOR(N) is a storage type. Vectors are stored in the document store as JSON arrays. HNSW indexes are created explicitly.
-- Create table with vector column
CREATE TABLE papers (
id SERIAL PRIMARY KEY,
title TEXT,
embedding VECTOR(384)
);
-- Insert vectors
INSERT INTO papers (title, embedding) VALUES
('attention is all you need', ARRAY[0.1, 0.2, ...]);
-- Brute-force KNN (no index needed, exact results)
SELECT title, _score FROM papers
WHERE knn_match(embedding, ARRAY[0.1, 0.2, ...], 5)
ORDER BY _score DESC;
-- Create HNSW index for approximate (faster) search
CREATE INDEX idx_emb ON papers USING hnsw (embedding)
WITH (ef_construction = 200, m = 16);
-- Now knn_match uses HNSW (same query syntax)
SELECT title, _score FROM papers
WHERE knn_match(embedding, ARRAY[0.1, 0.2, ...], 10)
ORDER BY _score DESC;
-- Drop and recreate index (vectors remain in document store)
DROP INDEX idx_emb;
CREATE INDEX idx_emb ON papers USING hnsw (embedding);
Text Search
Full-text search with BM25 scoring, powered by an inverted index with configurable text analysis pipelines.
@@ Operator (Query String Mini-Language)
-- Boolean query: AND / OR / NOT
SELECT title, _score FROM articles
WHERE title @@ 'database AND query' ORDER BY _score DESC;
-- Phrase query
SELECT title, _score FROM articles
WHERE title @@ '"machine learning"' ORDER BY _score DESC;
-- Field targeting
SELECT title, _score FROM articles
WHERE _all @@ 'title:attention AND body:transformer'
ORDER BY _score DESC;
-- Hybrid text + vector fusion via @@
SELECT title, _score FROM articles
WHERE _all @@ 'body:search AND embedding:[0.1, 0.9, 0.0, 0.0]'
ORDER BY _score DESC;
Function-Based Search
-- BM25 full-text search
SELECT title, _score FROM papers
WHERE text_match(title, 'attention transformer')
ORDER BY _score DESC;
-- Bayesian BM25 (calibrated probability in [0,1])
SELECT title, _score FROM papers
WHERE bayesian_match(title, 'deep learning')
ORDER BY _score DESC;
-- Create a custom analyzer with stemming
SELECT * FROM create_analyzer('english', '{
"tokenizer": {"type": "standard"},
"token_filters": [
{"type": "lowercase"},
{"type": "stop", "language": "english"},
{"type": "porter_stem"}
]
}');
-- FROM-clause text search
SELECT * FROM text_search('attention', 'title', 'papers');
Graph Queries
Apache AGE compatible openCypher integration for property graph queries on named graphs.
-- Create a named graph
SELECT * FROM create_graph('social');
-- Create vertices and edges
SELECT * FROM cypher('social', $$
CREATE (a:Person {name: 'Alice', age: 30})
-[:KNOWS]->
(b:Person {name: 'Bob', age: 25})
RETURN a.name, b.name
$$) AS (a agtype, b agtype);
-- Pattern matching with WHERE
SELECT * FROM cypher('social', $$
MATCH (p:Person)-[:KNOWS]->(friend:Person)
WHERE p.age > 25
RETURN p.name, friend.name
ORDER BY p.name
$$) AS (person agtype, friend agtype);
-- Variable-length paths
SELECT * FROM cypher('social', $$
MATCH (a:Person {name: 'Alice'})-[:KNOWS*1..3]->(remote)
RETURN remote.name
$$) AS (name agtype);
-- Graph traversal (SQL functions)
SELECT * FROM traverse(1, 'cited_by', 2);
SELECT * FROM rpq('cited_by/cited_by', 1);
-- Bounded RPQ: paths of 2-3 hops
SELECT * FROM rpq('knows{2,3}', 1);
Centrality
-- Graph centrality (FROM clause)
SELECT _doc_id, title, _score FROM pagerank() ORDER BY _score DESC;
SELECT _doc_id, title, _score FROM hits() ORDER BY _score DESC;
SELECT _doc_id, title, _score FROM betweenness() ORDER BY _score DESC;
-- Centrality on named graph
SELECT _doc_id, _score FROM pagerank('graph:social') ORDER BY _score DESC;
-- Centrality as WHERE signal
SELECT title, _score FROM papers WHERE pagerank() AND year >= 2020;
-- Weighted path query
SELECT title, _score FROM papers
WHERE weighted_rpq('cites/cites', 7, 'weight', 'sum', 6.0);
-- Graph mutation via SQL
SELECT * FROM graph_add_vertex(1, 'person', 'employees');
SELECT * FROM graph_add_edge(1, 1, 2, 'knows', 'employees', 'weight=0.8');
Multi-Signal Fusion
Combine relevance signals from different paradigms (text, vector, graph) into a single ranked result.
-- Log-odds fusion: text + vector + graph
SELECT title, _score FROM papers
WHERE fuse_log_odds(
text_match(title, 'attention'),
knn_match(embedding, ARRAY[0.1, 0.2, ...], 5),
traverse_match(1, 'cited_by', 2)
)
ORDER BY _score DESC;
-- Probabilistic AND / OR / NOT
SELECT title, _score FROM papers
WHERE fuse_prob_and(
bayesian_match(title, 'neural'),
knn_match(embedding, ARRAY[...], 10)
)
ORDER BY _score DESC;
Gating
-- Log-odds with ReLU gating
SELECT title, _score FROM papers
WHERE fuse_log_odds(
text_match(title, 'attention'),
pagerank(),
'relu'
) ORDER BY _score DESC;
-- Progressive fusion: cascading WAND
SELECT title, _score FROM papers
WHERE progressive_fusion(
text_match(title, 'attention'),
traverse_match(1, 'cites', 2), 5,
pagerank(), 3
) ORDER BY _score DESC;
EXPLAIN SELECT title, _score FROM papers
WHERE fuse_log_odds(
text_match(title, 'attention'),
knn_match(embedding, ARRAY[...], 5)
);
Geospatial
R*Tree spatial index with Haversine great-circle distance for POINT columns.
-- Create table with POINT column
CREATE TABLE restaurants (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
cuisine TEXT NOT NULL,
location POINT
);
-- Create R*Tree spatial index
CREATE INDEX idx_loc ON restaurants USING rtree (location);
-- Range query with Haversine distance
SELECT name,
ROUND(ST_Distance(location, POINT(-73.9857, 40.7484)), 0) AS dist_m
FROM restaurants
WHERE spatial_within(location, POINT(-73.9857, 40.7484), 5000)
ORDER BY dist_m;
-- Spatial + text + vector fusion
SELECT name, _score FROM restaurants
WHERE fuse_log_odds(
text_match(description, 'pizza'),
spatial_within(location, POINT(-73.9969, 40.7306), 3000),
knn_match(embedding, $1, 5)
) ORDER BY _score DESC;
Spatial Functions
| Function | Description |
|---|---|
ST_Distance(p1, p2) | Haversine great-circle distance in meters |
ST_Within(p1, p2, dist) | Distance predicate (boolean) |
ST_DWithin(p1, p2, dist) | Alias for ST_Within |
POINT(x, y) | Construct a POINT value (longitude, latitude) |
spatial_within(field, point, dist) | WHERE-clause geospatial range query (R*Tree + Haversine) |
Temporal Graph
Time-aware graph operations, GNN-style message passing, and structural embeddings.
-- Temporal traversal: edges valid at timestamp
SELECT * FROM temporal_traverse(1, 'knows', 2, 1700000000);
-- GNN message passing: aggregate neighbor properties
SELECT * FROM papers
WHERE message_passing(2, 'mean', 'citations');
-- Structural graph embeddings
SELECT * FROM papers
WHERE graph_embedding(16, 3);
Temporal / GNN Functions
| Function | Description |
|---|---|
temporal_traverse(start, lbl, hops, ts) | Time-aware graph traversal |
message_passing(k, agg, property) | GNN k-layer neighbor aggregation |
graph_embedding(dims, k) | Structural graph embeddings |
Foreign Data Wrappers
Query external data sources (Parquet, CSV, JSON, remote servers) as if they were local tables.
-- Register a DuckDB-backed server
CREATE SERVER warehouse FOREIGN DATA WRAPPER duckdb_fdw;
-- Map a Parquet directory with Hive partitioning
CREATE FOREIGN TABLE sales (
id INTEGER, name TEXT, amount INTEGER,
year INTEGER, month INTEGER
) SERVER warehouse OPTIONS (
source '/data/sales/**/*.parquet',
hive_partitioning 'true'
);
-- Predicates pushed down to DuckDB for partition pruning
SELECT name, SUM(amount) FROM sales
WHERE year IN (2024, 2025) AND month > 6
GROUP BY name;
-- Arrow Flight SQL for remote servers
CREATE SERVER remote FOREIGN DATA WRAPPER arrow_fdw
OPTIONS (host 'flights.example.com', port '8815');
-- Full SQL works on foreign tables
SELECT s.name, e.dept
FROM sales s JOIN employees e ON s.rep_id = e.id
ORDER BY s.amount DESC LIMIT 10;
Full Query Pushdown (v0.19.0)
When all tables in a query are foreign tables on the same DuckDB server, the entire query is pushed to DuckDB — no Python row materialization. JOINs, aggregates, window functions, subqueries, and CTEs all execute natively.
-- Entire query delegated to DuckDB (41M rows, instant)
SELECT payment_type, COUNT(*) AS trips,
AVG(total_amount) AS avg_total
FROM taxi_trips
GROUP BY payment_type
ORDER BY trips DESC;
-- Window functions also pushed down
SELECT name, value,
ROW_NUMBER() OVER (ORDER BY value DESC) AS rn
FROM foreign_data;
Mixed Foreign-Local Queries
Local dimension tables (<100K rows) are shipped to DuckDB as temp tables,
letting the full JOIN execute in DuckDB. If DuckDB cannot handle
UQA-specific functions (e.g., spatial_within), the standard
UQA operator pipeline takes over automatically.
-- Local taxi_zones (263 rows) shipped to DuckDB,
-- JOINed with foreign taxi_trips (41M rows) in DuckDB
SELECT z.zone, z.borough, COUNT(*) AS pickups
FROM taxi_trips t
JOIN taxi_zones z ON t.pu_location_id = z.location_id
GROUP BY z.zone, z.borough
ORDER BY pickups DESC LIMIT 10;
Python API
Engine
from uqa.engine import Engine
# In-memory
engine = Engine()
# Persistent (SQLite-backed)
engine = Engine(db_path="research.db")
# With tuning parameters
engine = Engine(
db_path="research.db",
parallel_workers=4, # ThreadPool concurrency
spill_threshold=100000 # Disk spill after 100K rows
)
# SQL interface
result = engine.sql("SELECT * FROM papers")
print(result.columns) # ['id', 'title', 'year']
print(result.rows) # [{'id': 1, 'title': '...', ...}]
# Arrow / Parquet export
arrow_table = result.to_arrow()
result.to_parquet("papers.parquet")
# Retrieve a document by ID
doc = engine.get_document(1, table="papers")
# Access a table's graph store
gs = engine.get_graph_store("papers")
# Get/set field analyzer
engine.set_table_analyzer("papers", "title", "english_stem")
analyzer = engine.get_table_analyzer("papers", "title")
# Context manager
with Engine(db_path="data.db") as engine:
engine.sql("SELECT 1")
QueryBuilder (Fluent API)
from uqa.core.types import Equals, GreaterThanOrEqual
# Text search with BM25 scoring
result = (
engine.query(table="papers")
.term("attention", field="title")
.score_bm25("attention")
.execute()
)
# Boolean composition
q1 = engine.query(table="papers").term("neural", field="title")
q2 = engine.query(table="papers").term("network", field="title")
combined = q1.and_(q2)
# Filter + aggregate
result = (
engine.query(table="employees")
.filter("dept", Equals("Engineering"))
.aggregate("salary", "avg")
)
# Graph traversal
team = engine.query(table="org").traverse(ceo_id, "manages", max_hops=3)
total_salary = team.vertex_aggregate("salary", "sum")
# Multi-signal fusion
result = (
engine.query(table="papers")
.term("attention", field="title")
.score_bayesian_bm25("attention")
.knn(query_vector, k=10, field="embedding")
.fuse_log_odds()
.execute()
)
# Arrow export
arrow_table = engine.query(table="papers").term("ai", field="title").execute_arrow()
engine.query(table="papers").execute_parquet("results.parquet")
Text Analysis
from uqa.analysis import (
Analyzer, StandardTokenizer, LowerCaseFilter,
StopWordFilter, PorterStemFilter
)
# Build a custom analyzer
analyzer = Analyzer(
tokenizer=StandardTokenizer(),
token_filters=[
LowerCaseFilter(),
StopWordFilter(language="english"),
PorterStemFilter(),
],
)
tokens = analyzer.analyze("The quick brown foxes jumped over lazy dogs")
# ['quick', 'brown', 'fox', 'jump', 'lazi', 'dog']
# Register for reuse
from uqa.analysis import register_analyzer
register_analyzer("english_stem", analyzer)
# Or via SQL
engine.sql("""SELECT * FROM create_analyzer('english_stem', '{
"tokenizer": {"type": "standard"},
"token_filters": [{"type": "lowercase"}, {"type": "stop"}, {"type": "porter_stem"}]
}')""")
Transactions
BEGIN;
INSERT INTO accounts (name, balance) VALUES ('Alice', 1000);
SAVEPOINT sp1;
UPDATE accounts SET balance = balance - 100 WHERE name = 'Alice';
ROLLBACK TO SAVEPOINT sp1; -- undo the UPDATE
COMMIT; -- INSERT is committed
Python API
with engine.begin() as tx:
engine.sql("INSERT INTO logs (msg) VALUES ('hello')")
tx.savepoint("sp1")
engine.sql("INSERT INTO logs (msg) VALUES ('world')")
tx.rollback_to("sp1")
# only 'hello' is committed
EXPLAIN / ANALYZE
-- Show query plan
EXPLAIN SELECT * FROM employees WHERE salary > 80000;
-- Show plan with execution stats
EXPLAIN ANALYZE SELECT e.name, d.name
FROM employees e JOIN departments d ON e.dept_id = d.id;
-- Collect column statistics for the optimizer
ANALYZE employees;