1- from datetime import date , datetime
1+ from datetime import date , datetime , timezone
22from decimal import Decimal
33from pathlib import Path
44from typing import Any , BinaryIO , Dict , List , Literal , Optional , Union
55
6- from pydantic import BaseModel , ConfigDict , Field , field_validator , model_validator
6+ from pydantic import BaseModel , ConfigDict , Field , PrivateAttr , field_validator , model_validator
77
88
99def _reconstruct_metadata_types (metadata : Dict [str , Any ], metadata_types : Dict [str , str ]) -> Dict [str , Any ]:
@@ -74,6 +74,8 @@ class Document(BaseModel):
7474
7575 # Client reference for update methods
7676 _client = None
77+ # When this document snapshot was pulled from the server (UTC), set at construction.
78+ _fetched_at : Optional [datetime ] = PrivateAttr (default_factory = lambda : datetime .now (timezone .utc ))
7779
7880 @model_validator (mode = "after" )
7981 def _reconstruct_types (self ) -> "Document" :
@@ -85,38 +87,96 @@ def _reconstruct_types(self) -> "Document":
8587
8688 @property
8789 def status (self ) -> Dict [str , Any ]:
88- """Get the latest processing status of the document from the API.
89-
90- Returns:
91- Dict[str, Any]: Status information including current status, potential errors, and other metadata
90+ """Processing status of the document **as of when it was fetched** (a snapshot).
91+
92+ This reads the status already carried on the document (in ``system_metadata``) and
93+ does **not** make a network call. The returned dict includes:
94+
95+ - ``status``: ``processing`` / ``completed`` / ``failed`` (or ``unknown``)
96+ - ``error``: error message when ``status == "failed"``
97+ - ``created_at`` / ``updated_at``: server timestamps for the document
98+ - ``as_of``: ISO-8601 timestamp of when this snapshot was pulled from the server
99+ - ``source``: ``"local"`` (read from the document) or ``"not_loaded"``
100+
101+ If the document was fetched with a field projection that excluded the status
102+ (e.g. ``list_documents(fields=["metadata"])`` without ``"status"``), there is no
103+ local status and this returns ``status="unknown"`` / ``source="not_loaded"`` —
104+ **it does not make a network call.** To include status cheaply in a projection,
105+ add ``"status"`` to ``fields``; for the *current* live status, call :meth:`refresh`
106+ or :meth:`wait_for_completion` instead of re-reading this property in a loop.
92107 """
93- if self ._client is None :
94- raise ValueError (
95- "Document instance not connected to a client. Use a document returned from a Morphik client method."
96- )
97- return self ._client .get_document_status (self .external_id )
108+ sm = self .system_metadata or {}
109+ if "status" in sm :
110+ return {
111+ "document_id" : self .external_id ,
112+ "status" : sm .get ("status" ),
113+ "error" : sm .get ("error" ),
114+ "created_at" : sm .get ("created_at" ),
115+ "updated_at" : sm .get ("updated_at" ),
116+ "as_of" : self ._fetched_at .isoformat () if self ._fetched_at else sm .get ("updated_at" ),
117+ "source" : "local" ,
118+ }
119+ # Status was not fetched with this document (e.g. projected away). Do not silently
120+ # make a per-document API call — report it as not loaded.
121+ return {
122+ "document_id" : self .external_id ,
123+ "status" : "unknown" ,
124+ "error" : None ,
125+ "as_of" : self ._fetched_at .isoformat () if self ._fetched_at else None ,
126+ "source" : "not_loaded" ,
127+ }
98128
99129 @property
100130 def is_processing (self ) -> bool :
101- """Check if the document is still being processed."""
131+ """True if the document was still processing **as of when it was fetched** (snapshot).
132+
133+ See :attr:`status`. Returns ``False`` if status was not loaded (projected away). Use
134+ :meth:`refresh` / :meth:`wait_for_completion` for the current state.
135+ """
102136 return self .status .get ("status" ) == "processing"
103137
104138 @property
105139 def is_ingested (self ) -> bool :
106- """Check if the document has completed processing."""
140+ """True if the document had completed processing **as of when it was fetched** (snapshot).
141+
142+ See :attr:`status`. Returns ``False`` if status was not loaded (projected away). Use
143+ :meth:`refresh` / :meth:`wait_for_completion` for the current state.
144+ """
107145 return self .status .get ("status" ) == "completed"
108146
109147 @property
110148 def is_failed (self ) -> bool :
111- """Check if document processing has failed."""
149+ """True if document processing had failed **as of when it was fetched** (snapshot).
150+
151+ See :attr:`status`. Returns ``False`` if status was not loaded (projected away). Use
152+ :meth:`refresh` for the current state.
153+ """
112154 return self .status .get ("status" ) == "failed"
113155
114156 @property
115157 def error (self ) -> Optional [str ]:
116- """Get the error message if processing failed."""
158+ """Error message if processing had failed (snapshot; see :attr:`status`) ."""
117159 status_info = self .status
118160 return status_info .get ("error" ) if status_info .get ("status" ) == "failed" else None
119161
162+ def refresh (self ) -> "Document" :
163+ """Re-fetch this document from the server and return the updated snapshot.
164+
165+ Use this (or :meth:`wait_for_completion`) when you need the *current* status rather
166+ than the snapshot carried on this object::
167+
168+ doc = doc.refresh()
169+ if doc.is_ingested:
170+ ...
171+
172+ Requires a document returned from a Morphik client method.
173+ """
174+ if self ._client is None :
175+ raise ValueError (
176+ "Document instance not connected to a client. Use a document returned from a Morphik client method."
177+ )
178+ return self ._client .get_document (self .external_id )
179+
120180 def wait_for_completion (self , timeout_seconds = 300 , check_interval_seconds = 2 , progress_callback = None ):
121181 """Wait for document processing to complete.
122182
0 commit comments