Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/usage/log.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Default level is 2 (CONTENT), which logs request parameters, scheduling info, an

* `fastdeploy.log` : Main log file, records system configuration, startup information, runtime status, etc.
* `request.log` : Request log file, records user request lifecycle and processing details
* `trace.log` : Trace log file, records events and timestamps for each stage of request processing, used for performance analysis
* `console.log` : Console log, records model startup time and other information. This log is also printed to the console.
* `error.log` : Error log file, records all ERROR and above level logs
* `workerlog.*` : Symlinks to paddle workerlog files in `paddle/` subdirectory. Tracks model loading progress and inference operator errors. Each GPU card has a corresponding file.
Expand Down
1 change: 1 addition & 0 deletions docs/zh/usage/log.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ FastDeploy 将日志分为三个通道:

* `fastdeploy.log` : 主日志文件,记录系统配置、启动信息、运行状态等
* `request.log` : 请求日志文件,记录用户请求的生命周期和处理细节
* `trace.log` : 追踪日志文件,记录请求处理各阶段的事件和时间戳,用于性能分析
* `console.log` : 控制台日志,记录模型启动耗时等信息,该日志信息会被打印到控制台
* `error.log` : 错误日志文件,记录所有 ERROR 及以上级别的日志
* `workerlog.*` : 指向 `paddle/` 子目录中 paddle workerlog 文件的符号链接,记录模型启动加载进度及推理算子报错信息,每个卡对应一个文件
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/cache_manager/cache_messager.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,7 @@ def consume_signals(self):
)
self.cache_prefilled_engine_ids_queue.put(batch_engine_signals)
except Exception as e:
logger.error(f"Consume signals get exception: {e}")
logger.error(f"Consume signals get exception: {e}, {traceback.format_exc()}")

def _handle_connect_task(self):
while True:
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/cache_manager/cache_transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,7 +1302,7 @@ def do_data_transfer(self):
except (BrokenPipeError, EOFError, ConnectionResetError) as e:
# When a cache_transfer_manager process remains, it keeps printing error logs and may exhaust disk space.
# Add a check to see if the worker process is alive; if it has ended, exit the loop to stop continuous logging.
logger.error(f"[CacheTransferManager] Connection broken: {e}")
logger.error(f"[CacheTransferManager] Connection broken: {e}, {traceback.format_exc()}")
Comment thread
xyxinyang marked this conversation as resolved.
consecutive_error_count += 1
if consecutive_error_count > max_errors:
try:
Expand Down Expand Up @@ -1427,7 +1427,7 @@ def _transfer_data(
f"transfer data: Get unexpected event type {event_type}, only SWAP2CPU and SWAP2GPU supported"
)
except Exception as e:
logger.error(f"transfer data: error: {e}")
logger.error(f"transfer data: error: {e}, {traceback.format_exc()}")
raise e
end_time = time.time()
elasped_time = end_time - start_time
Expand Down Expand Up @@ -1466,7 +1466,7 @@ def check_cache_status(self, args):
self.kv_cache_status_signal.value[0] = KVCacheStatus.CLEARED
self._log_memory("check_cache_status: after clearing caches")
except Exception as e:
logger.error(f"check_cache_status: failed to clear caches: {e}")
logger.error(f"check_cache_status: failed to clear caches: {e}, {traceback.format_exc()}")

elif self.kv_cache_status_signal.value[0] == KVCacheStatus.UPDATING:
assert args.splitwise_role == "mixed", "Only mixed mode supports updating cache."
Expand All @@ -1486,7 +1486,7 @@ def check_cache_status(self, args):
self._log_memory("check_cache_status: after restoring caches")

except Exception as e:
logger.error(f"check_cache_status: failed to restore caches: {e}")
logger.error(f"check_cache_status: failed to restore caches: {e}, {traceback.format_exc()}")

time.sleep(0.1)

Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/cache_manager/multimodal_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import pickle
import threading
import traceback
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import Any, Tuple
Expand Down Expand Up @@ -157,4 +158,4 @@ def cache_request_handler(self):
logger.info(f"Get processor cache of mm_hashes: {req}")
self.router.send_multipart([client, b"", pickle.dumps(resp)])
except Exception as e:
logger.error(f"Error happened while handling processor cache request: {e}")
logger.error(f"Error happened while handling processor cache request: {e}, {traceback.format_exc()}")
15 changes: 13 additions & 2 deletions fastdeploy/cache_manager/prefix_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
from fastdeploy.engine.request import Request
from fastdeploy.inter_communicator import EngineCacheQueue, IPCSignal, PrefixTreeStatus
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.trace.constants import LoggingEventName
from fastdeploy.trace.trace_logger import print as trace_print
from fastdeploy.utils import get_hash_str, get_logger

logger = get_logger("prefix_cache_manager", "cache_manager.log")
Expand Down Expand Up @@ -624,6 +626,8 @@ def _prepare_cpu_cache(

assert len(need_transfer_task_gpu_block_ids) == len(need_transfer_task_cpu_block_ids)
logger.info(f"request_block_ids: req_id {req_id} issue_swap_task transfer_task_id {transfer_task_id}")
# Record CACHE_SWAP_IN trace event (CPU -> GPU)
trace_print(LoggingEventName.CACHE_SWAP_IN, req_id, None)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ 疑问 CACHE_SWAP_IN 事件在 issue_swap_task 之前记录,记录的是「发起换入请求」的时间点,而非换入完成时间点。

如果 issue_swap_task 失败或换入实际未完成,trace 日志中仍会出现 CACHE_SWAP_IN 事件,可能对性能分析产生误导(例如让人误以为换入成功完成)。请确认这是期望的语义:记录「换入开始」还是「换入完成」?

self.issue_swap_task(
transfer_task_id,
swap_node_ids,
Expand Down Expand Up @@ -915,7 +919,9 @@ def request_match_blocks(self, task: Request, block_size, *args):
f"request_match_blocks: an error occurred while prefix tree status is not normal, ignore it. {e}"
)
else:
logger.error(f"request_match_blocks: request_block_ids: error: {type(e)} {e}")
logger.error(
f"request_match_blocks: request_block_ids: error: {type(e)} {e}, {traceback.format_exc()}"
)
raise e

def request_block_ids(self, task, block_size, dec_token_num, *args):
Expand Down Expand Up @@ -999,6 +1005,11 @@ def request_block_ids(self, task, block_size, dec_token_num, *args):
# 3. update metrics
if matched_block_num > 0:
self.metrics.hit_req_count += 1
# Record CACHE_HIT trace event
trace_print(LoggingEventName.CACHE_HIT, req_id, None)
else:
# Record CACHE_MISS trace event
trace_print(LoggingEventName.CACHE_MISS, req_id, None)
self.metrics.calculate_hit_metrics(
req_id,
cpu_match_token_num,
Expand Down Expand Up @@ -1317,7 +1328,7 @@ def free_nodes_directly(self, node):
f"free_nodes_directly: an error occurred while prefix tree status is not normal, ignore it. {e}"
)
else:
logger.error(f"free_nodes_directly: error: {type(e)} {e}")
logger.error(f"free_nodes_directly: error: {type(e)} {e}, {traceback.format_exc()}")
raise e

def _handle_free_gpu_node_without_cpu(self, node):
Expand Down
5 changes: 4 additions & 1 deletion fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import json
import os
import traceback
from dataclasses import field
from enum import Enum
from typing import Any, Dict, Literal, Optional, Union
Expand Down Expand Up @@ -2274,7 +2275,9 @@ def postprocess_devices_and_ports(self):
else None
)
except Exception as e:
logger.error(f"Failed to extract local devices or ports. Servers may not be able to start properly. {e}")
logger.error(
f"Failed to extract local devices or ports. Servers may not be able to start properly. {e}, {traceback.format_exc()}"
)

def check(self):
"""
Expand Down
26 changes: 18 additions & 8 deletions fastdeploy/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async def start(self):
llm_logger.info("EngineServiceClient started successfully")

except Exception as e:
llm_logger.error(f"Failed to start EngineServiceClient: {e}")
llm_logger.error(f"Failed to start EngineServiceClient: {e}, {traceback.format_exc()}")
raise
return True

Expand Down Expand Up @@ -158,15 +158,15 @@ def signal_handler(signum, frame):
engine._exit_sub_services()
llm_logger.info("Engine process cleanup completed")
except Exception as e:
llm_logger.error(f"Error during engine cleanup: {e}")
llm_logger.error(f"Error during engine cleanup: {e}, {traceback.format_exc()}")

self.engine_process = multiprocessing.Process(target=run_engine)
self.engine_process.start()

llm_logger.info(f"Started engine process with PID: {self.engine_process.pid}")

except Exception as e:
llm_logger.error(f"Failed to start engine process: {e}")
llm_logger.error(f"Failed to start engine process: {e}, {traceback.format_exc()}")
raise

def _wait_engine_ready(self) -> bool:
Expand Down Expand Up @@ -328,7 +328,7 @@ async def init_connections(self):

llm_logger.info("High-performance ZMQ connections initialized successfully")
except Exception as e:
llm_logger.error(f"Failed to initialize ZMQ connections: {e}")
llm_logger.error(f"Failed to initialize ZMQ connections: {e}, {traceback.format_exc()}")
raise

async def get_model_config(self):
Expand Down Expand Up @@ -563,7 +563,12 @@ async def generate(
llm_logger.info(f"Request {conn_request_id} generator exit (outer)")
return
except Exception as e:
log_request_error(message="Request {request_id} failed: {error}", request_id=conn_request_id, error=e)
log_request_error(
message="Request {request_id} failed: {error}, {traceback}",
request_id=conn_request_id,
error=e,
traceback=traceback.format_exc(),
)
raise EngineError(str(e), error_code=500) from e
finally:
# Ensure request_map/request_num are cleaned up
Expand All @@ -585,7 +590,12 @@ async def abort_request(self, request_id: str) -> None:
await self.connection_manager.cleanup_request(request_id)
llm_logger.info(f"Aborted request {request_id}")
except Exception as e:
log_request_error(message="Failed to abort request {request_id}: {error}", request_id=request_id, error=e)
log_request_error(
message="Failed to abort request {request_id}: {error}, {traceback}",
request_id=request_id,
error=e,
traceback=traceback.format_exc(),
)

async def shutdown(self):
"""
Expand All @@ -601,7 +611,7 @@ async def shutdown(self):
try:
await self.connection_manager.close()
except Exception as e:
llm_logger.error(f"Error while stopping connection manager: {e}")
llm_logger.error(f"Error while stopping connection manager: {e}, {traceback.format_exc()}")

# Close ZMQ client
if hasattr(self, "request_client") and self.request_client is not None:
Expand All @@ -615,7 +625,7 @@ async def shutdown(self):
try:
super().shutdown()
except Exception as e:
llm_logger.error(f"Error while stopping engine service process: {e}")
llm_logger.error(f"Error while stopping engine service process: {e}, {traceback.format_exc()}")

llm_logger.info("AsyncLLM shutdown completed")

Expand Down
6 changes: 4 additions & 2 deletions fastdeploy/engine/common_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,9 @@ async def wait_one(queue_name: str, queue):
try:
response = task.result()
except Exception as e:
self.llm_logger.error(f"Waiting for control response from {name} failed: {repr(e)}")
self.llm_logger.error(
f"Waiting for control response from {name} failed: {repr(e)}, {traceback.format_exc()}"
)
raise

if response.error_code != 200:
Expand Down Expand Up @@ -2221,7 +2223,7 @@ def clear_data(self):
self.llm_logger.info("Clear Data: Successfully")
return True
except Exception as e:
self.llm_logger.error(f"Clear data error: {e}")
self.llm_logger.error(f"Clear data error: {e}, {traceback.format_exc()}")
return False

def _exit_sub_services(self):
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/engine/register_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def _check_instance_health(self, instance: Dict) -> bool:
response = requests.get(url, timeout=self._timeout)
return response.status_code == 200
except Exception as e:
logger.error(f"_is_decode_health error: {e}, host: {host_ip}, port: {port}")
logger.error(f"_is_decode_health error: {e}, host: {host_ip}, port: {port}, {traceback.format_exc()}")
return False

def _try_rdma_connect(self, instance: Dict) -> bool:
Expand Down Expand Up @@ -382,7 +382,7 @@ def _try_rdma_connect(self, instance: Dict) -> bool:
self.connect_status.pop(task_id, None)

except Exception as e:
logger.error(f"_try_rdma_connect error: {e}")
logger.error(f"_try_rdma_connect error: {e}, {traceback.format_exc()}")
return False

def _check_rdma_connection(self, instance: Dict) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/engine/sched/resource_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ def download_bos_features(bos_client, features_urls):
try:
self.bos_client = init_bos_client()
except Exception as e:
error_msg = f"request {request.request_id} init bos client error: {str(e)}"
error_msg = f"request {request.request_id} init bos client error: {str(e)}, {traceback.format_exc()}"
llm_logger.error(error_msg)
request.error_message = error_msg
request.error_code = 540
Expand Down
6 changes: 4 additions & 2 deletions fastdeploy/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ async def generate(request: dict):
except Exception as e:
# 记录完整的异常堆栈信息
log_request_error(
message="request[{request_id}] Error during generation: {error}",
message="request[{request_id}] Error during generation: {error}, {traceback}",
request_id=request.get("request_id"),
error=str(e),
traceback=traceback.format_exc(),
)
# 返回结构化的错误消息并终止流
output = {"error": str(e), "error_type": e.__class__.__name__}
Expand All @@ -94,9 +95,10 @@ async def event_generator():
except Exception as e:
# 记录完整的异常堆栈信息
log_request_error(
message="request[{request_id}] Error during generation: {error}",
message="request[{request_id}] Error during generation: {error}, {traceback}",
request_id=request.get("request_id"),
error=str(e),
traceback=traceback.format_exc(),
)
# 返回结构化的错误消息并终止流
error_msg = {"error": str(e), "error_type": e.__class__.__name__}
Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import os
import time
import traceback
import uuid
from pathlib import Path
from typing import List, Literal, Optional, Union
Expand Down Expand Up @@ -120,7 +121,7 @@ def http_get_with_retry(self, url, max_retries=3, retry_delay=1, backoff_factor=
except Exception as e:
retry_cnt += 1
if retry_cnt >= max_retries:
api_server_logger.error(f"HTTP GET failed: {e}. Max retries reached")
api_server_logger.error(f"HTTP GET failed: {e}. Max retries reached. {traceback.format_exc()}")
raise
api_server_logger.info(f"HTTP GET failed: {e}. Start retry {retry_cnt}")
time.sleep(delay)
Expand Down
7 changes: 4 additions & 3 deletions fastdeploy/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import asyncio
import os
import tempfile
import traceback
import uuid
from argparse import Namespace
from collections.abc import Awaitable
Expand Down Expand Up @@ -512,7 +513,7 @@ async def cleanup_resources() -> None:
try:
llm_engine._exit_sub_services()
except Exception as e:
console_logger.error(f"Error stopping engine: {e}")
console_logger.error(f"Error stopping engine: {e}, {traceback.format_exc()}")

# close client connections
if engine_client is not None:
Expand All @@ -522,7 +523,7 @@ async def cleanup_resources() -> None:
if hasattr(engine_client, "connection_manager"):
await engine_client.connection_manager.close()
except Exception as e:
console_logger.error(f"Error closing client connections: {e}")
console_logger.error(f"Error closing client connections: {e}, {traceback.format_exc()}")

# garbage collect
import gc
Expand All @@ -531,7 +532,7 @@ async def cleanup_resources() -> None:
print("run batch done")

except Exception as e:
console_logger.error(f"Error during cleanup: {e}")
console_logger.error(f"Error during cleanup: {e}, {traceback.format_exc()}")


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import json
import re
import traceback
import uuid
from collections.abc import Sequence
from typing import Union
Expand Down Expand Up @@ -230,9 +231,10 @@ def extract_tool_calls(self, model_output: str, request: ChatCompletionRequest)

except Exception as e:
log_request_error(
message="request[{request_id}] Error in extracting tool call from response: {error}",
message="request[{request_id}] Error in extracting tool call from response: {error}, {traceback}",
request_id=request.request_id,
error=str(e),
traceback=traceback.format_exc(),
)
return ExtractedToolCallInformation(tools_called=False, tool_calls=None, content=model_output)

Expand Down Expand Up @@ -351,9 +353,10 @@ def extract_tool_calls_streaming(
return delta
except Exception as e:
log_request_error(
message="request[{request_id}] Error in streaming tool call extraction: {error}",
message="request[{request_id}] Error in streaming tool call extraction: {error}, {traceback}",
request_id=request.get("request_id"),
error=str(e),
traceback=traceback.format_exc(),
)
return None
if "</tool_call>" in self.buffer:
Expand All @@ -366,8 +369,9 @@ def extract_tool_calls_streaming(

except Exception as e:
log_request_error(
message="request[{request_id}] Error in streaming tool call extraction: {error}",
message="request[{request_id}] Error in streaming tool call extraction: {error}, {traceback}",
request_id=request.get("request_id"),
error=str(e),
traceback=traceback.format_exc(),
)
return None
Loading
Loading