Quick Start
Get started with Stych API in minutes. Our API enables agentic dataset operations including cleaning, linking, auditing, and entity recognition all powered by local AI for complete privacy.
1. Get Your API Key
Email us to receive your API key.
2. Make Your First Request
Link records between two datasets with a simple API call:
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/matching/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [{id: 1, name: "Apple Inc"}],
dataset2: [{id: 1, company: "Apple Inc."}],
match_x__vs__y: ["name__vs__company"],
method: "QRatio"
})
}
);
const data = await response.json();
Authentication
All API requests require authentication using an API key. Include your API key in the request header for every request.
Header Format
Replace YOUR_API_KEY with your actual API key obtained from the dashboard.
API Endpoints
Stych API provides endpoints for synchronous matching, file uploads, and asynchronous job processing with AI-powered auditing.
/v1/matching/
Execute synchronous matching between two datasets. Returns results immediately. Best for datasets with fewer than 10,000 records.
Request Body
Example Request
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"confidence_threshold": 0.8,
"top_n": 3
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"confidence_threshold": 0.8,
"top_n": 3
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/matching/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [
{id: 1, name: "Apple Inc", address: "1 Apple Park Way"},
{id: 2, name: "Microsoft Corp", address: "One Microsoft Way"}
],
dataset2: [
{id: 1, company_name: "Apple Inc.", location: "Cupertino, CA"},
{id: 2, company_name: "Microsoft Corporation", location: "Redmond, WA"}
],
method: "similarity",
match_x__vs__y: ["name__vs__company_name", "address__vs__location"],
confidence_threshold: 0.8,
top_n: 3
})
}
);
const data = await response.json();
Response
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"location": "Cupertino, CA",
"confidence_name__vs__company_name": 0.95
},
"name": "Apple Inc",
"address": "1 Apple Park Way"
}
],
"messages": ["Matching completed successfully"],
"stats": {
"total_matches": 150,
"high_confidence": 142
}
}
/v1/matching/upload
Upload files directly for matching. Supports CSV, TSV, JSON, JSONL, Parquet, Excel (.xlsx, .xls), TXT, LOG, MD, and DBF formats. Files are automatically parsed and processed.
Request Body (multipart/form-data)
Example Request
# Using cURL with file upload
curl -X POST https://api.conformal.io/v1/matching/upload \
-H "x-api-key: YOUR_API_KEY" \
-F "dataset1_file=@customers.csv" \
-F "dataset2_file=@companies.csv" \
-F "method=similarity" \
-F 'match_x__vs__y=["name__vs__company_name"]' \
-F "top_n=3"
import requests
# Using requests with file upload
with open('customers.csv', 'rb') as f1, open('companies.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'method': 'similarity',
'match_x__vs__y': '["name__vs__company_name"]',
'top_n': '3'
}
response = requests.post(
"https://api.conformal.io/v1/matching/upload",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(response.json())
// Using FormData for file upload
const formData = new FormData();
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
formData.append('method', 'similarity');
formData.append('match_x__vs__y', '["name__vs__company_name"]');
formData.append('top_n', '3');
const response = await fetch(
"https://api.conformal.io/v1/matching/upload",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const data = await response.json();
Response
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"confidence_name__vs__company_name": 0.95
},
"name": "Apple Inc"
}
],
"messages": ["Matching completed successfully"]
}
/v1/jobs/
Submit asynchronous matching jobs for large datasets. Includes optional AI-powered auditing to validate and improve match quality. Recommended for datasets with 10,000+ records.
Request Body
Example Request
curl -X POST https://api.conformal.io/v1/jobs/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"top_n": 3,
"enable_auditing": true,
"audit_models": ["qwen/qwen3-32b"]
}'
import requests
response = requests.post(
"https://api.conformal.io/v1/jobs/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": [
{"id": 1, "name": "Apple Inc", "address": "1 Apple Park Way"},
{"id": 2, "name": "Microsoft Corp", "address": "One Microsoft Way"}
],
"dataset2": [
{"id": 1, "company_name": "Apple Inc.", "location": "Cupertino, CA"},
{"id": 2, "company_name": "Microsoft Corporation", "location": "Redmond, WA"}
],
"method": "similarity",
"match_x__vs__y": ["name__vs__company_name", "address__vs__location"],
"top_n": 3,
"enable_auditing": True,
"audit_models": ["qwen/qwen3-32b"]
}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
dataset1: [
{id: 1, name: "Apple Inc", address: "1 Apple Park Way"},
{id: 2, name: "Microsoft Corp", address: "One Microsoft Way"}
],
dataset2: [
{id: 1, company_name: "Apple Inc.", location: "Cupertino, CA"},
{id: 2, company_name: "Microsoft Corporation", location: "Redmond, WA"}
],
method: "similarity",
match_x__vs__y: ["name__vs__company_name", "address__vs__location"],
top_n: 3,
enable_auditing: true,
audit_models: ["qwen/qwen3-32b"]
})
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-...",
"status": "pending",
"message": "Job submitted successfully.",
"submitted_at": "2025-01-15T10:30:00Z"
}
/v1/jobs/upload
Submit asynchronous matching jobs with file uploads. Supports the same file formats as /v1/matching/upload. All parameters are passed as form fields.
Request Body (multipart/form-data)
Example Request
# Using cURL with file upload
curl -X POST https://api.conformal.io/v1/jobs/upload \
-H "x-api-key: YOUR_API_KEY" \
-F "dataset1_file=@customers.csv" \
-F "dataset2_file=@companies.csv" \
-F "method=similarity" \
-F 'match_x__vs__y=["name__vs__company_name"]' \
-F "top_n=3" \
-F "enable_auditing=true" \
-F 'audit_models=["qwen/qwen3-32b"]'
import requests
# Using requests with file upload
with open('customers.csv', 'rb') as f1, open('companies.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'method': 'similarity',
'match_x__vs__y': '["name__vs__company_name"]',
'top_n': '3',
'enable_auditing': 'true',
'audit_models': '["qwen/qwen3-32b"]'
}
response = requests.post(
"https://api.conformal.io/v1/jobs/upload",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(response.json())
// Using FormData for file upload
const formData = new FormData();
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
formData.append('method', 'similarity');
formData.append('match_x__vs__y', '["name__vs__company_name"]');
formData.append('top_n', '3');
formData.append('enable_auditing', 'true');
formData.append('audit_models', '["qwen/qwen3-32b"]');
const response = await fetch(
"https://api.conformal.io/v1/jobs/upload",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-...",
"status": "pending",
"message": "Job submitted successfully.",
"submitted_at": "2025-01-15T10:30:00Z"
}
/v1/jobs/{request_id}
Check the status of an asynchronous job and retrieve results when complete.
Path Parameters
Example Request
curl -X GET https://api.conformal.io/v1/jobs/abc123-def456-ghi789 \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.get(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
{
method: "GET",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"status": "completed",
"submitted_at": "2025-01-15T10:30:00Z",
"completed_at": "2025-01-15T10:35:00Z",
"results": {
"data": [...],
"stats": {
"total_matches": 150
}
}
}
Status values: pending, processing, matching, auditing, completed, failed, cancelled
/v1/jobs/{request_id}/cancel
Cancel a running job. Jobs with status "pending", "processing", "matching", or "auditing" can be cancelled. Jobs that are "completed", "failed", or "cancelled" cannot be cancelled.
Path Parameters
Example Request
curl -X POST https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.post(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789/cancel",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"status": "cancelled",
"message": "Job cancelled successfully",
"cancelled_at": "2025-01-15T10:32:00Z"
}
/v1/jobs/{request_id}
Permanently delete a job and all associated results. This action cannot be undone.
Path Parameters
Example Request
curl -X DELETE https://api.conformal.io/v1/jobs/abc123-def456-ghi789 \
-H "x-api-key: YOUR_API_KEY"
import requests
response = requests.delete(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
headers={"x-api-key": "YOUR_API_KEY"}
)
print(response.json())
const response = await fetch(
"https://api.conformal.io/v1/jobs/abc123-def456-ghi789",
{
method: "DELETE",
headers: {
"x-api-key": "YOUR_API_KEY"
}
}
);
const data = await response.json();
Response
{
"request_id": "abc123-def456-ghi789",
"message": "Job deleted successfully",
"deleted_at": "2025-01-15T10:40:00Z"
}
/v1/analyze_data/
Ask questions about your datasets conversationally. This endpoint helps you explore and understand your data before matching, including automatic column detection for identifying similar columns between datasets.
Request Body
Supported Questions
- "What columns are similar?" or "Identify similar columns" - Detects matching columns between two datasets
- "What fields would be matched?" - Shows which columns would be matched during auto detection
- "What are the column types?" - Describes data types and characteristics of columns
- "Show me sample data" - Displays sample values from the datasets
- "What columns are unique?" - Identifies columns with high uniqueness (good for matching)
- "Describe the columns" - Provides detailed information about column structure and content
Example Request
# Analyze datasets to identify similar columns
curl -X POST https://api.conformal.io/v1/analyze_data/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"question": "What columns are similar in these two datasets?",
"dataset1_path": "s3://bucket/products.csv",
"dataset2_path": "s3://bucket/inventory.csv"
}'
# Or with file upload
curl -X POST https://api.conformal.io/v1/analyze_data/ \
-H "x-api-key: YOUR_API_KEY" \
-F "question=What columns are similar?" \
-F "dataset1_file=@products.csv" \
-F "dataset2_file=@inventory.csv"
import requests
# Analyze datasets to identify similar columns
response = requests.post(
"https://api.conformal.io/v1/analyze_data/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"question": "What columns are similar in these two datasets?",
"dataset1_path": "s3://bucket/products.csv",
"dataset2_path": "s3://bucket/inventory.csv"
}
)
result = response.json()
print(result["answer"]) # Conversational answer
print(result["detected_matches"]) # Detected column pairs
# Or with file upload
with open('products.csv', 'rb') as f1, open('inventory.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'question': 'What columns are similar?'
}
response = requests.post(
"https://api.conformal.io/v1/analyze_data/",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(response.json())
// Analyze datasets to identify similar columns
const response = await fetch(
"https://api.conformal.io/v1/analyze_data/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json"
},
body: JSON.stringify({
question: "What columns are similar in these two datasets?",
dataset1_path: "s3://bucket/products.csv",
dataset2_path: "s3://bucket/inventory.csv"
})
}
);
const result = await response.json();
console.log(result.answer); // Conversational answer
console.log(result.detected_matches); // Detected column pairs
// Or with file upload
const formData = new FormData();
formData.append('question', 'What columns are similar?');
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
const uploadResponse = await fetch(
"https://api.conformal.io/v1/analyze_data/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const uploadResult = await uploadResponse.json();
Response
{
"answer": "I've analyzed your datasets and found 3 matching column pairs:\n\n1. field_0 (Products.csv) โ ITEM_NUM (inventory.DBF)\n2. field_1 (Products.csv) โ BRAND (inventory.DBF)\n3. field_2 (Products.csv) โ DESCRIPT (inventory.DBF)\n\nThese columns show strong similarity based on data patterns...",
"dataset1_info": {
"columns": ["field_0", "field_1", "field_2"],
"num_columns": 3,
"sample_data": {...},
"analysis": {
"unique_cols": ["field_0"],
"numeric_only_cols": ["field_0"],
"text_cols": ["field_1", "field_2"]
}
},
"dataset2_info": {
"columns": ["ITEM_NUM", "BRAND", "DESCRIPT"],
"num_columns": 3,
"sample_data": {...},
"analysis": {...}
},
"detected_matches": [
"field_0__vs__ITEM_NUM",
"field_1__vs__BRAND",
"field_2__vs__DESCRIPT"
]
}
Matching Algorithms
Choose the right algorithm for your use case. Each algorithm has different strengths in terms of speed, accuracy, and dataset size.
similarity
Best for large datasets (6,000+ records). Uses vector embeddings for semantic matching powered by local AI models.
- Semantic understanding
- Handles typos & variations
- Best accuracy
fabl
Bayesian probabilistic record linkage using Gibbs sampling. Provides posterior probabilities and credible intervals for uncertainty quantification.
- Statistical confidence measures
- Uncertainty quantification
- Best for probabilistic matching
QRatio
Quick fuzzy matching with excellent balance of speed and accuracy. Default choice for most use cases.
- Fast processing
- Good accuracy
- Low memory usage
WRatio
Weighted ratio that automatically chooses the best fuzzy matching method based on string characteristics.
- Adaptive matching
- Handles various formats
- Good for mixed data
token_sort_ratio
Handles word order differences. Perfect when the same words appear in different orders.
- Order-independent
- "ABC Co" = "Co ABC"
- Good for names/addresses
partial_ratio
Matches substrings, ideal when one string is a subset of another.
- Substring matching
- "Company Inc" vs "Company"
- Good for abbreviations
ratio
Simple Levenshtein distance ratio. Basic fuzzy matching algorithm.
- Basic string similarity
- 0-100 scale
- Fastest fuzzy method
token_set_ratio
Compares unique words, best for handling duplicated words in strings.
- Set-based comparison
- Handles duplicates
- Good for varied formats
token_ratio
Smart combination of token_sort_ratio and token_set_ratio for balanced matching.
- Combines token methods
- Balanced approach
- Good general purpose
partial_token_*
Partial matching combined with token-based methods for maximum flexibility.
- partial_token_sort_ratio
- partial_token_set_ratio
- partial_token_ratio
Use Cases
Stych API powers agentic data operations across various domains. Here are common use cases:
Record Linkage
Link customer records, product catalogs, or entity databases across multiple sources with high accuracy.
Data Deduplication
Identify and merge duplicate records within a single dataset, cleaning your data automatically.
Entity Recognition
Recognize and resolve entities across datasets, linking mentions to canonical records.
Data Auditing
Use AI-powered auditing to validate match quality and get detailed insights into your data.
Data Integration
Integrate data from multiple sources, creating unified views of your information.
Master Data Management
Maintain master data records by linking and deduplicating across systems.
Code Examples
Complete examples in multiple programming languages to help you get started quickly.
Basic Matching Example
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [{"id": 1, "name": "Apple Inc"}],
"dataset2": [{"id": 1, "company": "Apple Inc."}],
"match_x__vs__y": ["name__vs__company"],
"method": "QRatio"
}'
import requests
import pandas as pd
# Load datasets
df1 = pd.read_csv("customers.csv")
df2 = pd.read_csv("companies.csv")
# Convert to records
dataset1 = df1.to_dict("records")
dataset2 = df2.to_dict("records")
# Match records
response = requests.post(
"https://api.conformal.io/v1/matching/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": dataset1,
"dataset2": dataset2,
"method": "similarity",
"match_x__vs__y": ["customer_name__vs__company_name"],
"confidence_threshold": 0.85,
"top_n": 3
}
)
results = response.json()
print(f"Found {len(results['data'])} matches")
const axios = require('axios');
const fs = require('fs');
// Read datasets
const dataset1 = JSON.parse(fs.readFileSync('dataset1.json', 'utf8'));
const dataset2 = JSON.parse(fs.readFileSync('dataset2.json', 'utf8'));
// Async function to match records
async function matchRecords() {
try {
const response = await axios.post(
'https://api.conformal.io/v1/matching/',
{
dataset1: dataset1,
dataset2: dataset2,
method: 'QRatio',
match_x__vs__y: ['name__vs__company'],
confidence_threshold: 0.8
},
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
console.log('Matches:', response.data);
} catch (error) {
console.error('Error:', error.response?.data || error.message);
}
}
matchRecords();
Async Job Example
# Submit async job
curl -X POST https://api.conformal.io/v1/jobs/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"dataset1": [...],
"dataset2": [...],
"method": "similarity",
"match_x__vs__y": ["name__vs__company"],
"enable_auditing": true,
"audit_models": ["qwen/qwen3-32b"],
"top_n": 3
}'
# Check job status
curl -X GET https://api.conformal.io/v1/jobs/{request_id} \
-H "x-api-key: YOUR_API_KEY"
import requests
import time
# Submit async job with AI auditing
response = requests.post(
"https://api.conformal.io/v1/jobs/",
headers={"x-api-key": "YOUR_API_KEY"},
json={
"dataset1": dataset1,
"dataset2": dataset2,
"method": "similarity",
"match_x__vs__y": ["name__vs__company"],
"enable_auditing": True,
"audit_models": ["qwen/qwen3-32b"],
"top_n": 3
}
)
job = response.json()
request_id = job["request_id"]
# Poll for completion
while True:
status_response = requests.get(
f"https://api.conformal.io/v1/jobs/{request_id}",
headers={"x-api-key": "YOUR_API_KEY"}
)
status = status_response.json()
if status["status"] == "completed":
print("Job completed!")
print(status["results"])
break
elif status["status"] in ["failed", "cancelled"]:
print(f"Job {status['status']}:", status.get("error_message", status.get("message")))
break
time.sleep(5) # Poll every 5 seconds
const axios = require('axios');
// Submit async job with AI auditing
async function submitJob() {
const response = await axios.post(
'https://api.conformal.io/v1/jobs/',
{
dataset1: dataset1,
dataset2: dataset2,
method: 'similarity',
match_x__vs__y: ['name__vs__company'],
enable_auditing: true,
audit_models: ['qwen/qwen3-32b'],
top_n: 3
},
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
const job = response.data;
const requestId = job.request_id;
// Poll for completion
while (true) {
const statusResponse = await axios.get(
`https://api.conformal.io/v1/jobs/${requestId}`,
{
headers: {
'x-api-key': 'YOUR_API_KEY'
}
}
);
const status = statusResponse.data;
if (status.status === 'completed') {
console.log('Job completed!');
console.log(status.results);
break;
} else if (['failed', 'cancelled'].includes(status.status)) {
console.error(`Job ${status.status}:`, status.error_message || status.message);
break;
}
await new Promise(resolve => setTimeout(resolve, 5000)); // Poll every 5 seconds
}
}
submitJob();
Local AI & Privacy
How It Works
Conformal AI's Stych API runs entirely on local AI models. When you enable auditing or use the similarity algorithm, all processing happens on-premises or in your private cloud infrastructure.
- No data transmission: Your datasets remain in your infrastructure
- Local AI models: All AI models run locally (Qwen, GPT, etc.)
- Complete control: You control all data processing and storage
- Compliance ready: Meets strictest privacy and security requirements
Available Local AI Models
Best Practices
Choosing the Right Algorithm
- Use similarity for datasets with 6,000+ records or when you need semantic understanding
- Use fabl when you need probabilistic matching with uncertainty quantification and statistical confidence measures
- Use QRatio for smaller datasets or when speed is critical
- Use token_sort_ratio when word order varies (e.g., "John Smith" vs "Smith, John")
- Use partial_ratio when one string might be a substring of another
Optimizing Performance
- Use async jobs (/v1/jobs/) for datasets larger than 10,000 records
- Set appropriate confidence thresholds to filter low-quality matches
- Use "auto" for match_x__vs__y to let the API detect column pairs automatically
- Enable auditing for critical datasets to improve match quality
Data Preparation
- Clean and normalize data before matching (remove extra spaces, standardize formats)
- Include multiple fields in matching for better accuracy
- Use consistent ID fields to track records across datasets
- Consider data quality - better input data leads to better matches
Error Handling
The API uses standard HTTP status codes. Always check the response status and handle errors appropriately.
Common Error Codes
# Check HTTP status code
curl -X POST https://api.conformal.io/v1/matching/ \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{...}' \
-w "\nHTTP Status: %{http_code}\n"
# Handle rate limiting (429)
# Check Retry-After header and wait before retrying
# Python error handling example
try:
response = requests.post(url, headers=headers, json=data)
response.raise_for_status() # Raises exception for 4xx/5xx
results = response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 429:
retry_after = e.response.headers.get('Retry-After')
print(f"Rate limited. Retry after {retry_after} seconds")
else:
print(f"Error: {e.response.json()}")
// JavaScript error handling example
try {
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: JSON.stringify(data)
});
if (!response.ok) {
if (response.status === 429) {
const retryAfter = response.headers.get('Retry-After');
console.log(`Rate limited. Retry after ${retryAfter} seconds`);
} else {
const error = await response.json();
console.error('Error:', error);
}
} else {
const results = await response.json();
}
} catch (error) {
console.error('Request failed:', error);
}
Rate Limits
Rate limits ensure fair usage and system stability. Limits vary by plan and endpoint type.
X-RateLimit-Limit, X-RateLimit-Remaining, and X-RateLimit-Reset headers to help you manage rate limits.
Response Format
All successful responses follow a consistent format. Here's a detailed example:
{
"data": [
{
"0": {
"company_name": "Apple Inc.",
"location": "Apple Park, Cupertino, California",
"confidence_name__vs__company_name": 0.95,
"confidence_address__vs__location": 0.87
},
"1": {
"company_name": "Apple Corporation",
"location": "Cupertino, CA",
"confidence_name__vs__company_name": 0.82
},
"name": "Apple Inc",
"address": "1 Apple Park Way, Cupertino, CA"
}
],
"messages": ["Matching completed successfully"],
"stats": {
"total_matches": 150,
"high_confidence": 142,
"processing_time_ms": 1250
}
}
Response Fields
Data Analysis & Column Detection
Use the /v1/analyze_data/ endpoint to explore your datasets conversationally before matching. This powerful tool helps you understand your data structure, identify similar columns, and make informed decisions about which fields to match.
Common Use Cases
Column Detection
Ask "What columns are similar?" or "Identify similar columns" to automatically detect matching column pairs between two datasets.
Data Exploration
Understand your data structure with questions like "What are the column types?" or "Show me sample data".
Match Planning
Ask "What fields would be matched?" to see which columns would be used during auto detection before submitting a job.
Example: Column Detection Workflow
# Step 1: Analyze datasets to identify similar columns
curl -X POST https://api.conformal.io/v1/analyze_data/ \
-H "x-api-key: YOUR_API_KEY" \
-F "question=What columns are similar in these two datasets?" \
-F "dataset1_file=@products.csv" \
-F "dataset2_file=@inventory.csv"
# Response includes detected_matches: ["field_1__vs__BRAND", "field_2__vs__DESCRIPT"]
# Step 2: Use detected matches in your matching job
curl -X POST https://api.conformal.io/v1/jobs/upload \
-H "x-api-key: YOUR_API_KEY" \
-F "dataset1_file=@products.csv" \
-F "dataset2_file=@inventory.csv" \
-F "method=fabl" \
-F 'match_x__vs__y=["field_1__vs__BRAND", "field_2__vs__DESCRIPT"]' \
-F "confidence_threshold=0.82"
import requests
# Step 1: Analyze datasets to identify similar columns
with open('products.csv', 'rb') as f1, open('inventory.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'question': 'What columns are similar in these two datasets?'
}
analysis_response = requests.post(
"https://api.conformal.io/v1/analyze_data/",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
analysis = analysis_response.json()
print(analysis["answer"])
detected_matches = analysis["detected_matches"]
print(f"Detected matches: {detected_matches}")
# Step 2: Use detected matches in your matching job
with open('products.csv', 'rb') as f1, open('inventory.csv', 'rb') as f2:
files = {
'dataset1_file': f1,
'dataset2_file': f2
}
data = {
'method': 'fabl',
'match_x__vs__y': str(detected_matches), # Use detected matches
'confidence_threshold': '0.82'
}
job_response = requests.post(
"https://api.conformal.io/v1/jobs/upload",
headers={"x-api-key": "YOUR_API_KEY"},
files=files,
data=data
)
print(job_response.json())
// Step 1: Analyze datasets to identify similar columns
const formData = new FormData();
formData.append('question', 'What columns are similar in these two datasets?');
formData.append('dataset1_file', fileInput1.files[0]);
formData.append('dataset2_file', fileInput2.files[0]);
const analysisResponse = await fetch(
"https://api.conformal.io/v1/analyze_data/",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: formData
}
);
const analysis = await analysisResponse.json();
console.log(analysis.answer);
const detectedMatches = analysis.detected_matches;
console.log(`Detected matches: ${detectedMatches}`);
// Step 2: Use detected matches in your matching job
const jobFormData = new FormData();
jobFormData.append('dataset1_file', fileInput1.files[0]);
jobFormData.append('dataset2_file', fileInput2.files[0]);
jobFormData.append('method', 'fabl');
jobFormData.append('match_x__vs__y', JSON.stringify(detectedMatches)); // Use detected matches
jobFormData.append('confidence_threshold', '0.82');
const jobResponse = await fetch(
"https://api.conformal.io/v1/jobs/upload",
{
method: "POST",
headers: {
"x-api-key": "YOUR_API_KEY"
},
body: jobFormData
}
);
const job = await jobResponse.json();
console.log(job);
Response Format
The analyze_data endpoint returns a conversational answer along with structured dataset information: