Skip to content

Commit 0a50efc

Browse files
committed
fix: test AttributeError (base_url not _base_url), add build_ontology test, fix README raw_output reference
Made-with: Cursor
1 parent 1559867 commit 0a50efc

3 files changed

Lines changed: 36 additions & 18 deletions

File tree

README.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -362,20 +362,20 @@ See [SDK_TUTORIAL.md](SDK_TUTORIAL.md) for complete documentation of every servi
362362
Turn pipeline outputs into corpus-level knowledge graphs, ontologies, and structured datasets with incremental ingestion. Feed the output of any Latence pipeline into Dataset Intelligence to extract entities, resolve duplicates, build knowledge graphs with RotatE link prediction, and induce ontological concepts.
363363

364364
```python
365-
# Step 1: Run a pipeline to process documents
366-
job = client.pipeline.run(files=["doc1.pdf", "doc2.pdf", "doc3.pdf"])
367-
pkg = job.wait_for_completion()
368-
369-
# Step 2: Feed pipeline output into Dataset Intelligence
365+
# Dataset Intelligence consumes pipeline stage outputs.
366+
# Use the portal's Dataset Intelligence UI to upload pipeline results,
367+
# or submit programmatically via the SDK:
370368
di = client.experimental.dataset_intelligence_service
371-
result = di.run(input_data=pkg.raw_output, return_job=True)
372-
# Poll at /api/v1/pipeline/{result.job_id} for status
373369

374-
# Step 3: Append new documents later (incremental)
375-
new_pkg = client.pipeline.run(files=["doc4.pdf"]).wait_for_completion()
370+
# Create a new dataset from pipeline output (dict with stage keys)
371+
job = di.run(input_data=pipeline_output, return_job=True)
372+
print(f"Job submitted: {job.job_id}")
373+
# Poll status at GET /api/v1/pipeline/{job.job_id}
374+
375+
# Append new documents to an existing dataset
376376
delta = di.run(
377-
input_data=new_pkg.raw_output,
378-
dataset_id=result.dataset_id, # appends to existing dataset
377+
input_data=new_pipeline_output,
378+
dataset_id="ds_existing_id", # appends to existing dataset
379379
return_job=True,
380380
)
381381
```

docs/dataset_intelligence.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ job = client.experimental.dataset_intelligence_service.run(
1111
input_data=pipeline_output,
1212
return_job=True,
1313
)
14-
result = client.jobs.wait(job.job_id) # or poll /api/v1/pipeline/{job_id}
15-
print(result.status)
14+
print(f"Job submitted: {job.job_id}")
15+
# Poll status at GET /api/v1/pipeline/{job.job_id}
1616
```
1717

1818
> **Note:** Direct service APIs live under `client.experimental.*`. Dataset Intelligence requires pipeline output as input — run the [pipeline](pipelines.md) first, then feed its output here for corpus-level analysis.
@@ -76,6 +76,8 @@ result = client.experimental.dataset_intelligence_service.enrich(
7676
print(f"Tier: {result.tier}") # "tier1"
7777
```
7878

79+
> `enrich()` is synchronous-only (no `return_job`). For large payloads, consider using `run(return_job=True)` with `tier="tier1"` via `config_overrides` if you need async submission.
80+
7981
## `client.experimental.dataset_intelligence_service.build_graph()`
8082

8183
Tier 2 — knowledge graph construction with entity resolution and RotatE link prediction.
@@ -114,6 +116,8 @@ for concept in result.data.get("concepts", []):
114116
print(f" {concept['label']} (level {concept.get('level', 0)})")
115117
```
116118

119+
> `build_ontology()` is synchronous-only (no `return_job`). For large payloads, consider using `run(return_job=True)` with `tier="tier3"` via `config_overrides` if you need async submission.
120+
117121
---
118122

119123
## Input Data Format

tests/integration/test_di_e2e.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
2020
Run:
2121
LATENCE_API_KEY=lat_xxx \
22-
pytest tests/integration/test_di_e2e.py -v -s --timeout=600
22+
pytest tests/integration/test_di_e2e.py -v -s
2323
"""
2424

2525
import json
@@ -87,8 +87,8 @@ def pipeline_payload():
8787

8888
def _poll_pipeline_job(client: Latence, job_id: str) -> dict:
8989
"""Poll the pipeline endpoint until the job completes or times out."""
90-
base_url = client._client._base_url
91-
api_key = client._client._api_key
90+
base_url = client._client.base_url
91+
api_key = client._client.api_key
9292
url = f"{base_url}/api/v1/pipeline/{job_id}"
9393
headers = {"Authorization": f"Bearer {api_key}"}
9494

@@ -299,6 +299,20 @@ def test_build_graph_async(self, client, pipeline_payload):
299299
assert result.job_id.startswith("di_")
300300
print(f"[DI E2E] build_graph() job: {result.job_id}")
301301

302+
def test_build_ontology_submits(self, client, pipeline_payload):
303+
"""Tier 3: build_ontology() submits without error."""
304+
di = client.experimental.dataset_intelligence_service
305+
try:
306+
result = di.build_ontology(input_data=pipeline_payload)
307+
assert isinstance(result, DatasetIntelligenceResponse)
308+
assert result.tier == "tier3"
309+
print(f"[DI E2E] build_ontology() returned: tier={result.tier}, "
310+
f"credits={result.usage.credits}")
311+
except Exception as e:
312+
if "timeout" in str(e).lower() or "504" in str(e):
313+
pytest.skip(f"Tier3 sync call timed out (expected for large payloads): {e}")
314+
raise
315+
302316
def test_run_async(self, client, pipeline_payload):
303317
"""Full: run(return_job=True) returns a job."""
304318
di = client.experimental.dataset_intelligence_service
@@ -326,8 +340,8 @@ def test_payload_exceeds_threshold(self, pipeline_payload):
326340

327341
def test_presign_endpoint(self, client):
328342
"""The /api/v1/di/presign endpoint responds correctly."""
329-
base_url = client._client._base_url
330-
api_key = client._client._api_key
343+
base_url = client._client.base_url
344+
api_key = client._client.api_key
331345

332346
resp = httpx.post(
333347
f"{base_url}/api/v1/di/presign",

0 commit comments

Comments
 (0)