Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions xprof/convert/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,7 @@ cc_test(
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
"@org_xprof//plugin/xprof/protobuf:hardware_types_proto_cc",
"@org_xprof//plugin/xprof/protobuf:op_metrics_proto_cc",
"@org_xprof//plugin/xprof/protobuf:op_stats_proto_cc",
"@org_xprof//plugin/xprof/protobuf:power_metrics_proto_cc",
"@org_xprof//plugin/xprof/protobuf:steps_db_proto_cc",
Expand Down
12 changes: 8 additions & 4 deletions xprof/convert/op_metrics_db_combiner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ void CopyOpMetricsMetadata(const OpMetrics& src, OpMetrics* dst) {
if (!dst->has_layout() && src.has_layout()) {
*dst->mutable_layout() = src.layout();
}
if (!dst->has_children() && src.has_children()) {
*dst->mutable_children() = src.children();
}
if (!dst->has_source_info() && src.has_source_info()) {
*dst->mutable_source_info() = src.source_info();
}
Expand Down Expand Up @@ -101,6 +98,10 @@ void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst,
if (src.has_vdd_energy_j() || dst->has_vdd_energy_j()) {
dst->set_vdd_energy_j(src.vdd_energy_j() + dst->vdd_energy_j());
}
if (src.has_children()) {
OpMetricsDbCombiner combiner(dst->mutable_children());
combiner.Combine(src.children(), update_num_cores);
}
}

void CombineMemoryAccessedBreakdown(
Expand Down Expand Up @@ -146,7 +147,10 @@ void OpMetricsDbCombiner::Combine(const OpMetricsDb& src,
dst->set_busy_time_ps(src.busy_time_ps() + dst->busy_time_ps());
dst->set_normalized_total_op_time_ps(src.normalized_total_op_time_ps() +
dst->normalized_total_op_time_ps());
CombinePrecisionStats(src.precision_stats(), dst->mutable_precision_stats());
if (src.has_precision_stats() || dst->has_precision_stats()) {
CombinePrecisionStats(src.precision_stats(),
dst->mutable_precision_stats());
}

for (const auto& src_metrics : src.metrics_db()) {
auto* dst_metrics = LookupOrInsertNewOpMetrics(src_metrics.hlo_module_id(),
Expand Down
80 changes: 80 additions & 0 deletions xprof/convert/op_stats_combiner_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limitations under the License.
#include "google/protobuf/util/message_differencer.h"
#include "xla/tsl/platform/types.h"
#include "plugin/xprof/protobuf/hardware_types.pb.h"
#include "plugin/xprof/protobuf/op_metrics.pb.h"
#include "plugin/xprof/protobuf/op_stats.pb.h"
#include "plugin/xprof/protobuf/power_metrics.pb.h"
#include "plugin/xprof/protobuf/steps_db.pb.h"
Expand Down Expand Up @@ -234,6 +235,85 @@ TEST(CombineAllOpStatsTest, CombineDisaggregatedServingStats) {
1e-6);
}

TEST(CombineAllOpStatsTest, CombineDeviceOpMetricsDbWithChildren) {
OpStats dst_op_stats, op_stats_1, op_stats_2;

// Set up op_stats_1 with one device op that has children.
{
auto* db = op_stats_1.mutable_device_op_metrics_db();
auto* op = db->add_metrics_db();
op->set_name("parent_op");
op->set_hlo_module_id(123);
op->set_occurrences(1);
op->set_time_ps(100);

auto* child_db = op->mutable_children();
auto* child_op = child_db->add_metrics_db();
child_op->set_name("child_op_1");
child_op->set_hlo_module_id(123);
child_op->set_occurrences(2);
child_op->set_time_ps(50);
}

// Set up op_stats_2 with the same device op that has other children.
{
auto* db = op_stats_2.mutable_device_op_metrics_db();
auto* op = db->add_metrics_db();
op->set_name("parent_op");
op->set_hlo_module_id(123);
op->set_occurrences(2);
op->set_time_ps(200);

auto* child_db = op->mutable_children();
auto* child_op = child_db->add_metrics_db();
child_op->set_name("child_op_2");
child_op->set_hlo_module_id(123);
child_op->set_occurrences(3);
child_op->set_time_ps(80);
}

OpStatsInfo op_stats_info_1(&op_stats_1, TPU, 0);
OpStatsInfo op_stats_info_2(&op_stats_2, TPU, 1);

std::vector<OpStatsInfo> all_op_stats_info = {op_stats_info_1,
op_stats_info_2};

StepDatabaseResult dummy_step_db_result;
absl::flat_hash_map<uint32_t /*host_id*/, const StepDatabaseResult*> result;
result.insert({0, &dummy_step_db_result});
StepIntersection dummy_step_intersection = StepIntersection(1, result);

CombineAllOpStats(all_op_stats_info, dummy_step_intersection, &dst_op_stats);

// Verify that the combined op metrics has the parent_op.
const auto& combined_db = dst_op_stats.device_op_metrics_db();
ASSERT_EQ(combined_db.metrics_db_size(), 1);
const auto& combined_op = combined_db.metrics_db(0);
EXPECT_EQ(combined_op.name(), "parent_op");
EXPECT_EQ(combined_op.occurrences(), 3);
EXPECT_EQ(combined_op.time_ps(), 300);

// Verify that both child_op_1 and child_op_2 are present in the children db
// of parent_op.
const auto& combined_children_db = combined_op.children();
ASSERT_EQ(combined_children_db.metrics_db_size(), 2);

// We should look up the children to verify their values since their order in
// the repeated field might vary.
absl::flat_hash_map<std::string, const OpMetrics*> children_map;
for (const auto& child : combined_children_db.metrics_db()) {
children_map[child.name()] = &child;
}

ASSERT_TRUE(children_map.contains("child_op_1"));
EXPECT_EQ(children_map["child_op_1"]->occurrences(), 2);
EXPECT_EQ(children_map["child_op_1"]->time_ps(), 50);

ASSERT_TRUE(children_map.contains("child_op_2"));
EXPECT_EQ(children_map["child_op_2"]->occurrences(), 3);
EXPECT_EQ(children_map["child_op_2"]->time_ps(), 80);
}

} // namespace
} // namespace profiler
} // namespace tensorflow
2 changes: 0 additions & 2 deletions xprof/convert/xplane_to_op_metrics_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,6 @@ TEST(ConvertXPlaneToOpMetricsDb, HostXPlaneWithXlaOps) {
}
total_time_ps: 20000000
total_op_time_ps: 18000000
precision_stats {}
)pb"));
#endif
}
Expand Down Expand Up @@ -643,7 +642,6 @@ TEST(ConvertXPlaneToOpMetricsDb, HostXPlaneWithInputPipelineTracemeOps) {
}
total_time_ps: 30000000
total_op_time_ps: 20000000
precision_stats {}
)pb")));
#endif
}
Expand Down