Quick Start
Get started with Stych API in minutes. Our API enables agentic dataset operations including cleaning, linking, auditing, and entity recognition all powered by local AI for complete privacy.
1. Get Your API Key
Email us to receive your API key.
2. Make Your First Request
Link records between two datasets with a simple API call:
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/matching/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [{id: 1, name: "Apple Inc"}],
dataset2: [{id: 1, company: "Apple Inc."}],
match_x__vs__y: ["name__vs__company"],
method: "QRatio"
})
}
);
const data = await response.json();
Authentication
All API requests require authentication using an API key. Include your API key in the request header for every request.
Header Format
Replace YOUR_API_KEY with your actual API key obtained from the dashboard.
API Endpoints
Stych API provides endpoints for synchronous matching, file uploads, and asynchronous job processing with AI-powered auditing.
/v1/matching/
Execute synchronous matching between two datasets. Returns results immediately. Best for datasets with fewer than 10,000 records.
Request Body
Example Request
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"confidence_threshold": 0.8,
"top_n": 3
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"confidence_threshold": 0.8,
"top_n": 3
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/matching/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [
{id: 1, name: "Apple Inc", address: "1 Apple Park Way"},
{id: 2, name: "Microsoft Corp", address: "One Microsoft Way"}
],
dataset2: [
{id: 1, company_name: "Apple Inc.", location: "Cupertino, CA"},
{id: 2, company_name: "Microsoft Corporation", location: "Redmond, WA"}
],
method: "similarity",
match_x__vs__y: ["name__vs__company_name", "address__vs__location"],
confidence_threshold: 0.8,
top_n: 3
})
}
);
const data = await response.json();
Response
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"location": "Cupertino, CA",
"confidence_name__vs__company_name": 0.95
},
"name": "Apple Inc",
"address": "1 Apple Park Way"
}
],
"messages": ["Matching completed successfully"],
"stats": {
"total_matches": 150,
"high_confidence": 142
}
}
/v1/matching/upload
Upload files directly for matching. Supports CSV, TSV, JSON, JSONL, Parquet, Excel (.xlsx, .xls), TXT, LOG, MD, and DBF formats. Files are automatically parsed and processed.
Request Body (multipart/form-data)
Example Request
# Using cURL with file upload
curl -X POST https://api.conformal.io/v1/matching/upload \
-H "x-api-key: YOUR_API_KEY" \
-F "dataset1_file=@customers.csv" \
-F "dataset2_file=@companies.csv" \
-F "method=similarity" \
-F 'match_x__vs__y=["name__vs__company_name"]' \
-F "top_n=3"
import requests
# Using requests with file upload
with open('customers.csv', 'rb') as f1, open('companies.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'method': 'similarity',
'match_x__vs__y': '["name__vs__company_name"]',
'top_n': '3'
}
response = requests.post(
"https://api.conformal.io/v1/matching/upload",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(response.json())
// Using FormData for file upload
const formData = new FormData();
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
formData.append('method', 'similarity');
formData.append('match_x__vs__y', '["name__vs__company_name"]');
formData.append('top_n', '3');
const response = await fetch(
"https://api.conformal.io/v1/matching/upload",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const data = await response.json();
Response
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"confidence_name__vs__company_name": 0.95
},
"name": "Apple Inc"
}
],
"messages": ["Matching completed successfully"]
}
/v1/jobs/
Submit asynchronous matching jobs for large datasets. Includes optional AI-powered auditing to validate and improve match quality. Recommended for datasets with 10,000+ records.
Request Body
Example Request
curl -X POST https://api.conformal.io/v1/jobs/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"top_n": 3,
"enable_auditing": true,
"audit_models": ["qwen/qwen3-32b"]
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/jobs/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"top_n": 3,
"enable_auditing": True,
"audit_models": ["qwen/qwen3-32b"]
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [
{id: 1, name: "Apple Inc", address: "1 Apple Park Way"},
{id: 2, name: "Microsoft Corp", address: "One Microsoft Way"}
],
dataset2: [
{id: 1, company_name: "Apple Inc.", location: "Cupertino, CA"},
{id: 2, company_name: "Microsoft Corporation", location: "Redmond, WA"}
],
method: "similarity",
match_x__vs__y: ["name__vs__company_name", "address__vs__location"],
top_n: 3,
enable_auditing: true,
audit_models: ["qwen/qwen3-32b"]
})
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-...",
"status": "pending",
"message": "Job submitted successfully.",
"submitted_at": "2025-01-15T10:30:00Z"
}
/v1/jobs/upload
Submit asynchronous matching jobs with file uploads. Supports the same file formats as /v1/matching/upload. All parameters are passed as form fields.
Request Body (multipart/form-data)
Example Request
# Using cURL with file upload
curl -X POST https://api.conformal.io/v1/jobs/upload \
-H "x-api-key: YOUR_API_KEY" \
-F "dataset1_file=@customers.csv" \
-F "dataset2_file=@companies.csv" \
-F "method=similarity" \
-F 'match_x__vs__y=["name__vs__company_name"]' \
-F "top_n=3" \
-F "enable_auditing=true" \
-F 'audit_models=["qwen/qwen3-32b"]'
import requests
# Using requests with file upload
with open('customers.csv', 'rb') as f1, open('companies.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'method': 'similarity',
'match_x__vs__y': '["name__vs__company_name"]',
'top_n': '3',
'enable_auditing': 'true',
'audit_models': '["qwen/qwen3-32b"]'
}
response = requests.post(
"https://api.conformal.io/v1/jobs/upload",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(response.json())
// Using FormData for file upload
const formData = new FormData();
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
formData.append('method', 'similarity');
formData.append('match_x__vs__y', '["name__vs__company_name"]');
formData.append('top_n', '3');
formData.append('enable_auditing', 'true');
formData.append('audit_models', '["qwen/qwen3-32b"]');
const response = await fetch(
"https://api.conformal.io/v1/jobs/upload",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-...",
"status": "pending",
"message": "Job submitted successfully.",
"submitted_at": "2025-01-15T10:30:00Z"
}
/v1/jobs/{request_id}
Check the status of an asynchronous job and retrieve results when complete.
Path Parameters
Example Request
curl -X GET https://api.conformal.io/v1/jobs/abc123-def456-ghi789 \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.get(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
{
method: "GET",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"status": "completed",
"submitted_at": "2025-01-15T10:30:00Z",
"completed_at": "2025-01-15T10:35:00Z",
"results": {
"data": [...],
"stats": {
"total_matches": 150
}
}
}
Status values: pending, processing, matching, auditing, completed, failed, cancelled
/v1/jobs/{request_id}/cancel
Cancel a running job. Jobs with status "pending", "processing", "matching", or "auditing" can be cancelled. Jobs that are "completed", "failed", or "cancelled" cannot be cancelled.
Path Parameters
Example Request
curl -X POST https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.post(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"status": "cancelled",
"message": "Job cancelled successfully",
"cancelled_at": "2025-01-15T10:32:00Z"
}
/v1/jobs/{request_id}
Permanently delete a job and all associated results. This action cannot be undone.
Path Parameters
Example Request
curl -X DELETE https://api.conformal.io/v1/jobs/abc123-def456-ghi789 \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.delete(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
{
method: "DELETE",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"message": "Job deleted successfully",
"deleted_at": "2025-01-15T10:40:00Z"
}
Matching Algorithms
Choose the right algorithm for your use case. Each algorithm has different strengths in terms of speed, accuracy, and dataset size.
similarity
Best for large datasets (6,000+ records). Uses vector embeddings for semantic matching powered by local AI models.
- Semantic understanding
- Handles typos & variations
- Best accuracy
QRatio
Quick fuzzy matching with excellent balance of speed and accuracy. Default choice for most use cases.
- Fast processing
- Good accuracy
- Low memory usage
WRatio
Weighted ratio that automatically chooses the best fuzzy matching method based on string characteristics.
- Adaptive matching
- Handles various formats
- Good for mixed data
token_sort_ratio
Handles word order differences. Perfect when the same words appear in different orders.
- Order-independent
- "ABC Co" = "Co ABC"
- Good for names/addresses
partial_ratio
Matches substrings, ideal when one string is a subset of another.
- Substring matching
- "Company Inc" vs "Company"
- Good for abbreviations
ratio
Simple Levenshtein distance ratio. Basic fuzzy matching algorithm.
- Basic string similarity
- 0-100 scale
- Fastest fuzzy method
token_set_ratio
Compares unique words, best for handling duplicated words in strings.
- Set-based comparison
- Handles duplicates
- Good for varied formats
token_ratio
Smart combination of token_sort_ratio and token_set_ratio for balanced matching.
- Combines token methods
- Balanced approach
- Good general purpose
partial_token_*
Partial matching combined with token-based methods for maximum flexibility.
- partial_token_sort_ratio
- partial_token_set_ratio
- partial_token_ratio
Use Cases
Stych API powers agentic data operations across various domains. Here are common use cases:
Record Linkage
Link customer records, product catalogs, or entity databases across multiple sources with high accuracy.
Data Deduplication
Identify and merge duplicate records within a single dataset, cleaning your data automatically.
Entity Recognition
Recognize and resolve entities across datasets, linking mentions to canonical records.
Data Auditing
Use AI-powered auditing to validate match quality and get detailed insights into your data.
Data Integration
Integrate data from multiple sources, creating unified views of your information.
Master Data Management
Maintain master data records by linking and deduplicating across systems.
Code Examples
Complete examples in multiple programming languages to help you get started quickly.
Basic Matching Example
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}'
import requests
import pandas as pd
# Load datasets
df1 = pd.read_csv("customers.csv")
df2 = pd.read_csv("companies.csv")
# Convert to records
dataset1 = df1.to_dict("records")
dataset2 = df2.to_dict("records")
# Match records
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": dataset1,
"dataset2": dataset2,
"method": "similarity",
"match_x__vs__y": ["customer_name__vs__company_name"],
"confidence_threshold": 0.85,
"top_n": 3
}
)
results = response.json()
print(f"Found {len(results['data'])} matches")
const axios = require('axios');
const fs = require('fs');
// Read datasets
const dataset1 = JSON.parse(fs.readFileSync('dataset1.json', 'utf8'));
const dataset2 = JSON.parse(fs.readFileSync('dataset2.json', 'utf8'));
// Async function to match records
async function matchRecords() {
try {
const response = await axios.post(
'https://api.conformal.io/v1/matching/',
{
dataset1: dataset1,
dataset2: dataset2,
method: 'QRatio',
match_x__vs__y: ['name__vs__company'],
confidence_threshold: 0.8
},
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
console.log('Matches:', response.data);
} catch (error) {
console.error('Error:', error.response?.data || error.message);
}
}
matchRecords();
Async Job Example
# Submit async job
curl -X POST https://api.conformal.io/v1/jobs/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [...],
"dataset2": [...],
"method": "similarity",
"match_x__vs__y": ["name__vs__company"],
"enable_auditing": true,
"audit_models": ["qwen/qwen3-32b"],
"top_n": 3
}'
# Check job status
curl -X GET https://api.conformal.io/v1/jobs/{request_id} \
-H "x-api-key: YOUR_API_KEY"
import requests
import time
# Submit async job with AI auditing
response = requests.post(
"https://api.conformal.io/v1/jobs/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": dataset1,
"dataset2": dataset2,
"method": "similarity",
"match_x__vs__y": ["name__vs__company"],
"enable_auditing": True,
"audit_models": ["qwen/qwen3-32b"],
"top_n": 3
}
)
job = response.json()
request_id = job["request_id"]
# Poll for completion
while True:
status_response = requests.get(
f"https://api.conformal.io/v1/jobs/{request_id}",
headers={"x-api-key": "YOUR_API_KEY"}
)
status = status_response.json()
if status["status"] == "completed":
print("Job completed!")
print(status["results"])
break
elif status["status"] in ["failed", "cancelled"]:
print(f"Job {status['status']}:", status.get("error_message", status.get("message")))
break
time.sleep(5) # Poll every 5 seconds
const axios = require('axios');
// Submit async job with AI auditing
async function submitJob() {
const response = await axios.post(
'https://api.conformal.io/v1/jobs/',
{
dataset1: dataset1,
dataset2: dataset2,
method: 'similarity',
match_x__vs__y: ['name__vs__company'],
enable_auditing: true,
audit_models: ['qwen/qwen3-32b'],
top_n: 3
},
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
const job = response.data;
const requestId = job.request_id;
// Poll for completion
while (true) {
const statusResponse = await axios.get(
`https://api.conformal.io/v1/jobs/${requestId}`,
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
const status = statusResponse.data;
if (status.status === 'completed') {
console.log('Job completed!');
console.log(status.results);
break;
} else if (['failed', 'cancelled'].includes(status.status)) {
console.error(`Job ${status.status}:`, status.error_message || status.message);
break;
}
await new Promise(resolve => setTimeout(resolve, 5000)); // Poll every 5 seconds
}
}
submitJob();
Local AI & Privacy
How It Works
Conformal AI's Stych API runs entirely on local AI models. When you enable auditing or use the similarity algorithm, all processing happens on-premises or in your private cloud infrastructure.
- No data transmission: Your datasets remain in your infrastructure
- Local AI models: All AI models run locally (Qwen, GPT, etc.)
- Complete control: You control all data processing and storage
- Compliance ready: Meets strictest privacy and security requirements
Available Local AI Models
Best Practices
Choosing the Right Algorithm
- Use similarity for datasets with 6,000+ records or when you need semantic understanding
- Use QRatio for smaller datasets or when speed is critical
- Use token_sort_ratio when word order varies (e.g., "John Smith" vs "Smith, John")
- Use partial_ratio when one string might be a substring of another
Optimizing Performance
- Use async jobs (/v1/jobs/) for datasets larger than 10,000 records
- Set appropriate confidence thresholds to filter low-quality matches
- Use "auto" for match_x__vs__y to let the API detect column pairs automatically
- Enable auditing for critical datasets to improve match quality
Data Preparation
- Clean and normalize data before matching (remove extra spaces, standardize formats)
- Include multiple fields in matching for better accuracy
- Use consistent ID fields to track records across datasets
- Consider data quality - better input data leads to better matches
Error Handling
The API uses standard HTTP status codes. Always check the response status and handle errors appropriately.
Common Error Codes
# Check HTTP status code
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{...}' \
-w "\nHTTP Status: %{http_code}\n"
# Handle rate limiting (429)
# Check Retry-After header and wait before retrying
# Python error handling example
try:
response = requests.post(url, headers=headers, json=data)
response.raise_for_status() # Raises exception for 4xx/5xx
results = response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
retry_after = e.response.headers.get('Retry-After')
print(f"Rate limited. Retry after {retry_after} seconds")
else:
print(f"Error: {e.response.json()}")
// JavaScript error handling example
try {
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: JSON.stringify(data)
});
if (!response.ok) {
if (response.status === 429) {
const retryAfter = response.headers.get('Retry-After');
console.log(`Rate limited. Retry after ${retryAfter} seconds`);
} else {
const error = await response.json();
console.error('Error:', error);
}
} else {
const results = await response.json();
}
} catch (error) {
console.error('Request failed:', error);
}
Rate Limits
Rate limits ensure fair usage and system stability. Limits vary by plan and endpoint type.
X-RateLimit-Limit, X-RateLimit-Remaining, and X-RateLimit-Reset headers to help you manage rate limits.
Response Format
All successful responses follow a consistent format. Here's a detailed example:
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"location": "Apple Park, Cupertino, California",
"confidence_name__vs__company_name": 0.95,
"confidence_address__vs__location": 0.87
},
"1": {
"company_name": "Apple Corporation",
"location": "Cupertino, CA",
"confidence_name__vs__company_name": 0.82
},
"name": "Apple Inc",
"address": "1 Apple Park Way, Cupertino, CA"
}
],
"messages": ["Matching completed successfully"],
"stats": {
"total_matches": 150,
"high_confidence": 142,
"processing_time_ms": 1250
}
}