Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
63901ef
docs: Fix typo 'occured' to 'occurred' in metrics
codeXsidd Mar 21, 2026
5c28519
Trigger CLA check
codeXsidd Mar 21, 2026
1584cfd
Stabilize SymbolicExpr and SymbolicMap string representation
tensorflower-gardener Apr 2, 2026
8be2ee2
[XLA:GPU] Use XLA_VLOG_DEVICE where device id is useful in logs.
PatriosTheGreat Apr 2, 2026
e22249f
[XLA:GPU] Do not run large HLO fusions concurrently in command buffer…
thomasjoerg Apr 2, 2026
f0dd107
Add TargetMachineOptions::Native() to create options for the host mac…
beckerhe Apr 2, 2026
0e5326d
Automated Code Change
derekmauro Apr 2, 2026
657dd2f
Bump rules_python 1.8.4 -> 1.8.5 to restore correct URL-based req pro…
belitskiy Apr 2, 2026
33264a6
[mpmd] Use reserved_hbm_bytes attribute to reserve per-fragment memory
ICGog Apr 2, 2026
7d64214
Make XLA:GPU propagate all CPU HW features to FFI and XLA:CPU
beckerhe Apr 2, 2026
d6c9fa0
Merge pull request #112909 from codeXsidd:fix-typo-metrics
tensorflower-gardener Apr 2, 2026
84e6b72
PR #40311: [xla:gpu] Delete vestigial WaitForStreamsThunk
ezhulenev Apr 2, 2026
2028a1b
Consolidate sanitizer build checks for tfrt_session_python_test.
tensorflower-gardener Apr 2, 2026
46b5a5c
Reverts cd712c81dacec1d737af7e27dc01c8fc0a7f5020
beckerhe Apr 2, 2026
7d08968
HLO builder tuple_test is hardware-independent.
nvgrw Apr 2, 2026
45e89d3
Original change is causing presubmit tests failure. Rolling back for …
tensorflower-gardener Apr 2, 2026
2048f98
Rollback of PR #39854
yuriivcs Apr 2, 2026
4603402
Make `HloProgram::name()` return a unique and stable name even if the…
junwhanahn Apr 2, 2026
147e4d9
Remove unneeded condition variable from coordination service.
mwhittaker Apr 2, 2026
c3f2bc1
Reverts e6f36bd7a12f87aac6f21a5068719dfc91bed3eb
Varcho Apr 2, 2026
cfc9b69
Populate device_memory_bytes_limit attribute while reconstructing Top…
krishnaharidasan Apr 2, 2026
8ba9f93
PR #40117: Update XLA oneAPI presubmit build targets
mraunak Apr 2, 2026
6b2cc0c
Drop deduplicating any calls on outliner.
ekayaaslan Apr 2, 2026
ca6995d
Centralize PjRt Transpose Caching in CommonPjRtClient.
junwhanahn Apr 2, 2026
d85c233
Implement non-descending layout support in `CpuRawBuffer::CopyToLiter…
junwhanahn Apr 2, 2026
ce5d9d1
[XLA:CPU] Add Pi approximation to fusion microbenchmark.
penpornk Apr 2, 2026
b5979e4
[IFRT IR] Add compile option to set strict memory reservations
ICGog Apr 2, 2026
3794104
Run `MakeArraysFromHostBuffer` and `CopyToHostBuffer` tests with stri…
junwhanahn Apr 2, 2026
6875c44
[XLA:CPU] Add Opt Preset FAST_COMPILE for interactive and test users.
seantalts Apr 2, 2026
fe9e7a0
Use salted hashing for key_hash so that we have fewer collisions in b…
ezbr Apr 2, 2026
f5df0e8
[XLA] Wrap loops containing only DCHECKs with ifndef NDEBUG.
zvikinoza Apr 2, 2026
f983c0f
[tsl] Clean up #includes in errors.h and status.h
d0k Apr 2, 2026
7a27b8a
Migrate multithreaded_compilation_test to PjRt runtime.
nvgrw Apr 2, 2026
84f6f47
[XLA:CPU] Rename is_fusion_emitters to use_fusion_emitters for consis…
seantalts Apr 2, 2026
d69b6eb
Remove unused reset logic from coordination agent.
mwhittaker Apr 2, 2026
b20eda2
[IFRT IR] Do not apply identity IfrtArraType conversion in lowering t…
ICGog Apr 2, 2026
9e6df82
[IFRT] Add `Value::ByteSize` method
hyeontaek Apr 2, 2026
13e3434
Erase async collectives in XLA CPU.
mwhittaker Apr 2, 2026
6ec44a0
Integrate LLVM at llvm/llvm-project@7ccd92e5e6e5
alinas Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 49 additions & 24 deletions tensorflow/core/framework/local_rendezvous.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,25 @@ limitations under the License.

#include "tensorflow/core/framework/local_rendezvous.h"

#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/hash/hash.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "xla/tsl/platform/logging.h"
#include "tensorflow/core/activity_watcher/activity.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/rendezvous.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/notification.h"
#include "tensorflow/core/lib/gtl/manual_constructor.h"
#include "tensorflow/core/lib/monitoring/counter.h"
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/refcount.h"
#include "tensorflow/core/platform/types.h"
#include "tsl/platform/refcount.h"

namespace tensorflow {
Expand Down Expand Up @@ -142,18 +141,43 @@ LocalRendezvous::~LocalRendezvous() {
}

namespace {
uint64_t KeyHash(absl::string_view k) {
// We use absl::HashOf instead of tsl::Hash64 because it's faster, and we
// don't need a deterministic hash function.
return absl::HashOf(k);
}
class KeyHash {
public:
// We use salted hashing (see go/totw/189) to reduce the likelihood of hash
// collisions. Note: if the strings are long, then it would be better to
// generate both hashes while iterating once over the string, but in practice,
// it's hard to beat absl::Hash, which is highly optimized.
explicit KeyHash(absl::string_view key) {
// We use absl::HashOf instead of tsl::Hash64 because it's faster, and we
// don't need a deterministic hash function.
bucket_hash_ = absl::HashOf(key);
constexpr int kArbitraryConstant = 100;
// Note: it's important that the arbitrary constant is passed to HashOf
// before `key` so that the different initial hash state cascades while
// hashing the string contents.
table_hash_ = absl::HashOf(kArbitraryConstant, key);
}
uint64_t bucket(uint64_t num_buckets) const {
return bucket_hash_ % num_buckets;
}
uint64_t table_hash() const { return table_hash_; }
std::string ToString() const {
return absl::StrFormat("bucket_hash: %#x, table_hash: %#x", bucket_hash_,
table_hash_);
}

private:
uint64_t bucket_hash_;
uint64_t table_hash_;
};
} // namespace

absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
const Rendezvous::Args& send_args,
const Tensor& val, const bool is_dead) {
uint64_t key_hash = KeyHash(key.FullKey());
DVLOG(2) << "Send " << this << " " << key_hash << " " << key.FullKey();
KeyHash key_hash = KeyHash(key.FullKey());
DVLOG(2) << "Send " << this << " " << key_hash.ToString() << " "
<< key.FullKey();

if (is_dead) {
static auto* rendezvous_dead_values_sent = monitoring::Counter<2>::New(
Expand All @@ -165,7 +189,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
->IncrementBy(1);
}

int bucket_index = key_hash % num_buckets_;
int bucket_index = key_hash.bucket(num_buckets_);
auto& bucket = table_buckets_[bucket_index];
bucket.mu.lock();

Expand All @@ -174,7 +198,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
return s;
}

auto it = bucket.table.insert({key_hash, ItemQueue()}).first;
auto it = bucket.table.insert({key_hash.table_hash(), ItemQueue()}).first;
ItemQueue* queue = &it->second;
if (queue->head == nullptr || queue->head->type == Item::kSend) {
// There is no waiter for this message. Append the message
Expand All @@ -192,7 +216,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
activity_watcher::Activity::Attributes{
{"Rendezvous", absl::StrFormat("%p", this)},
{"key", std::string(key.FullKey())},
{"key_hash", absl::StrCat(key_hash)},
{"key_hash", key_hash.ToString()},
});
},
/*level=*/1);
Expand Down Expand Up @@ -235,11 +259,12 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
const Rendezvous::Args& recv_args,
Rendezvous::DoneCallback done) {
uint64_t key_hash = KeyHash(key.FullKey());
DVLOG(2) << "Recv " << this << " " << key_hash << " " << key.FullKey();
KeyHash key_hash = KeyHash(key.FullKey());
DVLOG(2) << "Recv " << this << " " << key_hash.ToString() << " "
<< key.FullKey();
tsl::core::RefCountPtr<Rendezvous> rc_keep_alive;

int bucket_index = key_hash % num_buckets_;
int bucket_index = key_hash.bucket(num_buckets_);
auto& bucket = table_buckets_[bucket_index];
bucket.mu.lock();

Expand All @@ -250,7 +275,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
return;
}

auto it = bucket.table.insert({key_hash, ItemQueue()}).first;
auto it = bucket.table.insert({key_hash.table_hash(), ItemQueue()}).first;
ItemQueue* queue = &it->second;
if (queue->head == nullptr || queue->head->type == Item::kRecv) {
// There is no message to pick up.
Expand All @@ -274,7 +299,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
{
mutex_lock l(bucket.mu);

auto it = bucket.table.find(key_hash);
auto it = bucket.table.find(key_hash.table_hash());
if (it != bucket.table.end()) {
ItemQueue* queue = &it->second;
// Find an item in the queue with a cancellation token that matches
Expand Down Expand Up @@ -343,7 +368,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
activity_watcher::Activity::Attributes{
{"Rendezvous", absl::StrFormat("%p", this)},
{"key", std::string(key.FullKey())},
{"key_hash", absl::StrCat(key_hash)},
{"key_hash", key_hash.ToString()},
});
},
/*level=*/1);
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/framework/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ auto* tf_data_autotune_stopping_criteria_counter =

auto* tf_data_debug = tsl::monitoring::Counter<1>::New(
"/tensorflow/data/debug",
"The number of times this event occured, for debugging.", "event");
"The number of times this event occurred, for debugging.", "event");

auto* tf_data_error = tsl::monitoring::Counter<2>::New(
"/tensorflow/data/error",
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/framework/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ void RecordTFDataAutoShardRewriteBatchSize(
// criterion is met.
void RecordTFDataAutotuneStoppingCriteria(const std::string& name);

// Records the number of times this event occured, for debugging.
// Records the number of times this event occurred, for debugging.
void RecordTFDataDebug(const std::string& event);

// Records the number of times an error of this type occurred with this status
Expand Down
1 change: 0 additions & 1 deletion tensorflow/core/tfrt/run_handler_thread_pool/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ tf_cc_test(
"//tensorflow/core/kernels:matmul_op",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/synchronization",
"@com_google_absl//absl/time",
"@com_google_googletest//:gtest",
"@eigen_archive//:eigen3",
"@tf_runtime//:hostcontext",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ void RunHandler::ScheduleInterOpClosure(TaskFunction fn) {
}

void RunHandler::ScheduleIntraOpClosure(TaskFunction fn) {
impl_->ScheduleIntraOpClosure(std::move(fn));
impl_->ScheduleInterOpClosure(std::move(fn));
}

int RunHandler::NumThreads() const {
Expand Down
34 changes: 0 additions & 34 deletions tensorflow/core/tfrt/run_handler_thread_pool/run_handler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ limitations under the License.

#include "absl/synchronization/barrier.h"
#include "absl/synchronization/notification.h"
#include "absl/time/time.h"
#include "unsupported/Eigen/CXX11/Tensor" // from @eigen_archive
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/blocking_counter.h"
Expand Down Expand Up @@ -212,39 +211,6 @@ TEST(RunHandlerUtilTest, IntraOpThreadPool) {
notification.WaitForNotification();
}

// Verifies that ScheduleIntraOpClosure enqueues work to the non-blocking
// (intra-op) queue.
TEST(RunHandlerUtilTest, ScheduleIntraOpClosureRoutesToNonBlockingQueue) {
RunHandlerPool::Options options;
options.num_inter_op_threads = 1; // 1 blocking thread
options.num_intra_op_threads = 1; // 1 non-blocking thread
options.num_threads_in_sub_thread_pool = {2};
std::unique_ptr<RunHandlerPool> pool(new RunHandlerPool(options));

auto handler = pool->Get(/*step_id=*/1, /*timeout_in_ms=*/0);

// Block the sole inter-op (blocking) thread.
absl::Notification blocker_started;
absl::Notification blocker_release;
handler->ScheduleInterOpClosure(TaskFunction([&]() {
blocker_started.Notify();
blocker_release.WaitForNotification();
}));
blocker_started.WaitForNotification();

// Schedule an intra-op closure. With the correct implementation this goes
// to the non-blocking queue and the intra-op thread picks it up.
absl::Notification intra_done;
handler->ScheduleIntraOpClosure(TaskFunction([&]() { intra_done.Notify(); }));

// If ScheduleIntraOpClosure incorrectly enqueued as blocking work, the
// intra-op thread cannot pick it up and this would hang.
EXPECT_TRUE(intra_done.WaitForNotificationWithTimeout(absl::Seconds(10)));

// Unblock the inter-op thread so the pool can shut down cleanly.
blocker_release.Notify();
}

class RunHandlerThreadPoolTest
: public testing::TestWithParam<std::tuple<bool, bool>> {
protected:
Expand Down
4 changes: 1 addition & 3 deletions tensorflow/core/tfrt/tfrt_session/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,7 @@ tf_cc_shared_test(
# name = "tfrt_session_python_test",
# srcs = ["tfrt_session_python_test.py"],
# exec_properties = select({
# "//tools/cpp:asan_build": {"cpp_link.mem": "20g"},
# "//tools/cpp:msan_build": {"cpp_link.mem": "20g"},
# "//tools/cpp:tsan_build": {"cpp_link.mem": "20g"},
# "//tools/cpp:sanitizer_build": {"cpp_link.mem": "20g"},
# "//conditions:default": None,
# }),
# deps = [
Expand Down
6 changes: 3 additions & 3 deletions third_party/py/python_init_rules.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def python_init_rules(extra_patches = []):

tf_http_archive(
name = "rules_python",
sha256 = "c85d5db38d3eac06167a13b10c9dba54b003a986cd4f1ebc00806b74e7c12f06",
strip_prefix = "rules_python-1.8.4",
urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.8.4/rules_python-1.8.4.tar.gz"),
sha256 = "8964aa1e7525fea5244ba737458694a057ada1be96a92998a41caa1983562d00",
strip_prefix = "rules_python-1.8.5",
urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.8.5/rules_python-1.8.5.tar.gz"),
patch_file = [
"@xla//third_party/py:rules_python_scope.patch",
"@xla//third_party/py:rules_python_freethreaded.patch",
Expand Down
3 changes: 1 addition & 2 deletions third_party/xla/.github/workflows/rocm_jax_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,4 @@ jobs:
--override_repository=xla=${GITHUB_WORKSPACE} \
--config=single_gpu \
--local_test_jobs=4 \
--repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE} \
--crosstool_top=@local_config_rocm//crosstool:toolchain-local
--repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE}
7 changes: 2 additions & 5 deletions third_party/xla/MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ bazel_dep(name = "re2", version = "2025-11-05", repo_name = "com_googlesource_co
bazel_dep(name = "rules_cc", version = "0.2.0")
bazel_dep(name = "rules_java", version = "8.16.1")
bazel_dep(name = "rules_license", version = "1.0.0")
bazel_dep(name = "rules_python", version = "1.8.4")
bazel_dep(name = "rules_python", version = "1.8.5")
bazel_dep(name = "rules_shell", version = "0.6.1")
bazel_dep(name = "snappy", version = "1.2.1")
bazel_dep(name = "xxd", version = "9.1.0917")
Expand Down Expand Up @@ -137,7 +137,7 @@ single_version_override(
"//third_party/py:rules_python_versions.patch",
"//third_party/py:rules_python_scope.patch",
],
version = "1.8.4",
version = "1.8.5",
)

python = use_extension("@rules_python//python/extensions:python.bzl", "python")
Expand Down Expand Up @@ -230,9 +230,6 @@ register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64")
register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64_cuda")

### Other local config repos
local_clang_configure = use_extension("@rules_ml_toolchain//cc/llvms/local:local_clang_configure.bzl", "local_clang_configure_ext")
use_repo(local_clang_configure, "local_config_clang")

rocm_configure = use_extension("//third_party/extensions:rocm_configure.bzl", "rocm_configure_ext")
use_repo(rocm_configure, "local_config_rocm")

Expand Down
6 changes: 1 addition & 5 deletions third_party/xla/build_tools/ci/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@
"//build_tools/...",
"@tsl//tsl/...",
)
_XLA_ONEAPI_TARGET_PATTERNS = (
"//xla/stream_executor/sycl/...",
"//xla/service/gpu/...",
)
_XLA_CPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS = (
"//xla/tools/multihost_hlo_runner:hlo_runner_main",
"//xla/tools:compute_xspace_stats_main",
Expand Down Expand Up @@ -512,7 +508,7 @@ def nvidia_gpu_build_with_compute_capability(
"sycl_hermetic",
"icpx_clang",
),
target_patterns=_XLA_ONEAPI_TARGET_PATTERNS,
target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
build_tag_filters=oneapi_build_tag_filter,
test_tag_filters=oneapi_test_tag_filter,
options={**_DEFAULT_BAZEL_OPTIONS, "//xla/tsl:ci_build": True},
Expand Down
4 changes: 2 additions & 2 deletions third_party/xla/build_tools/ci/golden_commands.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,-rocm-only,-oneapi-on
bazel analyze-profile profile.json.gz
# END BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS
# BEGIN BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build --nobuild -- //xla/stream_executor/sycl/... //xla/service/gpu/...
bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/stream_executor/sycl/... //xla/service/gpu/...
parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/... //build_tools/... @tsl//tsl/...
bazel analyze-profile profile.json.gz
# END BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS
# BEGIN BuildType.XLA_MACOS_ARM64_CPU_KOKORO
Expand Down
21 changes: 10 additions & 11 deletions third_party/xla/tensorflow.bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,25 @@ common:asan --copt -g
common:asan --copt -fno-omit-frame-pointer
common:asan --linkopt -fsanitize=address

common:rocm_base --config=clang_local
common:rocm_base --copt=-Wno-gnu-offsetof-extensions
common:rocm_base --crosstool_top=@local_config_rocm//crosstool:toolchain
common:rocm_base --define=using_rocm_hipcc=true
common:rocm_base --define=tensorflow_mkldnn_contraction_kernel=0
common:rocm_base --repo_env TF_NEED_ROCM=1
common:rocm_base --action_env=HIPCC_COMPILE_FLAGS_APPEND="--offload-compress"

# ROCm with hermetic clang toolchain
common:rocm_clang_hermetic --config=rocm_base
common:rocm_clang_hermetic --extra_toolchains=@local_config_rocm//crosstool:toolchain-linux-x86_64-hermetic
common:rocm_clang_official --config=rocm_base
common:rocm_clang_official --action_env=CLANG_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
common:rocm_clang_official --action_env=HIPCC_COMPILE_FLAGS_APPEND="--offload-compress"
common:rocm_clang_official --action_env=TF_ROCM_CLANG="1"
common:rocm_clang_official --linkopt="-fuse-ld=lld"
common:rocm_clang_official --host_linkopt="-fuse-ld=lld"

# ROCm with local/system clang toolchain
# CLANG_COMPILER_PATH sets the compiler for the wrapper script
common:rocm_clang_local --config=rocm_base
common:rocm_clang_local --extra_toolchains=@local_config_rocm//crosstool:toolchain-linux-x86_64-local

common:rocm --config=rocm_clang_hermetic
common:rocm --config=rocm_clang_official
common:rocm_ci --config=rocm

common:rocm_ci_hermetic --dynamic_mode=off
common:rocm_ci_hermetic --config=rocm_clang_hermetic
common:rocm_ci_hermetic --config=rocm_clang_official
common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_VERSION=rocm_7.10.0_gfx90X"
common:rocm_ci_hermetic --@local_config_rocm//rocm:rocm_path_type=hermetic

Expand Down

This file was deleted.

Loading