GesuBackups · pull · Apr 3, 2026 · Mar 21, 2026 · Mar 21, 2026 · Apr 2, 2026
diff --git a/tensorflow/core/framework/local_rendezvous.cc b/tensorflow/core/framework/local_rendezvous.cc
@@ -15,26 +15,25 @@ limitations under the License.
 
 #include "tensorflow/core/framework/local_rendezvous.h"
 
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <utility>
+#include <vector>
 
+#include "absl/hash/hash.h"
 #include "absl/status/status.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
 #include "xla/tsl/platform/logging.h"
 #include "tensorflow/core/activity_watcher/activity.h"
-#include "tensorflow/core/framework/allocator.h"
-#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/rendezvous.h"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/gtl/manual_constructor.h"
 #include "tensorflow/core/lib/monitoring/counter.h"
-#include "tensorflow/core/lib/strings/numbers.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/refcount.h"
-#include "tensorflow/core/platform/types.h"
 #include "tsl/platform/refcount.h"
 
 namespace tensorflow {
@@ -142,18 +141,43 @@ LocalRendezvous::~LocalRendezvous() {
 }
 
 namespace {
-uint64_t KeyHash(absl::string_view k) {
-  // We use absl::HashOf instead of tsl::Hash64 because it's faster, and we
-  // don't need a deterministic hash function.
-  return absl::HashOf(k);
-}
+class KeyHash {
+ public:
+  // We use salted hashing (see go/totw/189) to reduce the likelihood of hash
+  // collisions. Note: if the strings are long, then it would be better to
+  // generate both hashes while iterating once over the string, but in practice,
+  // it's hard to beat absl::Hash, which is highly optimized.
+  explicit KeyHash(absl::string_view key) {
+    // We use absl::HashOf instead of tsl::Hash64 because it's faster, and we
+    // don't need a deterministic hash function.
+    bucket_hash_ = absl::HashOf(key);
+    constexpr int kArbitraryConstant = 100;
+    // Note: it's important that the arbitrary constant is passed to HashOf
+    // before `key` so that the different initial hash state cascades while
+    // hashing the string contents.
+    table_hash_ = absl::HashOf(kArbitraryConstant, key);
+  }
+  uint64_t bucket(uint64_t num_buckets) const {
+    return bucket_hash_ % num_buckets;
+  }
+  uint64_t table_hash() const { return table_hash_; }
+  std::string ToString() const {
+    return absl::StrFormat("bucket_hash: %#x, table_hash: %#x", bucket_hash_,
+                           table_hash_);
+  }
+
+ private:
+  uint64_t bucket_hash_;
+  uint64_t table_hash_;
+};
 }  // namespace
 
 absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
                                    const Rendezvous::Args& send_args,
                                    const Tensor& val, const bool is_dead) {
-  uint64_t key_hash = KeyHash(key.FullKey());
-  DVLOG(2) << "Send " << this << " " << key_hash << " " << key.FullKey();
+  KeyHash key_hash = KeyHash(key.FullKey());
+  DVLOG(2) << "Send " << this << " " << key_hash.ToString() << " "
+           << key.FullKey();
 
   if (is_dead) {
     static auto* rendezvous_dead_values_sent = monitoring::Counter<2>::New(
@@ -165,7 +189,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
         ->IncrementBy(1);
   }
 
-  int bucket_index = key_hash % num_buckets_;
+  int bucket_index = key_hash.bucket(num_buckets_);
   auto& bucket = table_buckets_[bucket_index];
   bucket.mu.lock();
 
@@ -174,7 +198,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
     return s;
   }
 
-  auto it = bucket.table.insert({key_hash, ItemQueue()}).first;
+  auto it = bucket.table.insert({key_hash.table_hash(), ItemQueue()}).first;
   ItemQueue* queue = &it->second;
   if (queue->head == nullptr || queue->head->type == Item::kSend) {
     // There is no waiter for this message. Append the message
@@ -192,7 +216,7 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
               activity_watcher::Activity::Attributes{
                   {"Rendezvous", absl::StrFormat("%p", this)},
                   {"key", std::string(key.FullKey())},
-                  {"key_hash", absl::StrCat(key_hash)},
+                  {"key_hash", key_hash.ToString()},
               });
         },
         /*level=*/1);
@@ -235,11 +259,12 @@ absl::Status LocalRendezvous::Send(const Rendezvous::ParsedKey& key,
 void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
                                 const Rendezvous::Args& recv_args,
                                 Rendezvous::DoneCallback done) {
-  uint64_t key_hash = KeyHash(key.FullKey());
-  DVLOG(2) << "Recv " << this << " " << key_hash << " " << key.FullKey();
+  KeyHash key_hash = KeyHash(key.FullKey());
+  DVLOG(2) << "Recv " << this << " " << key_hash.ToString() << " "
+           << key.FullKey();
   tsl::core::RefCountPtr<Rendezvous> rc_keep_alive;
 
-  int bucket_index = key_hash % num_buckets_;
+  int bucket_index = key_hash.bucket(num_buckets_);
   auto& bucket = table_buckets_[bucket_index];
   bucket.mu.lock();
 
@@ -250,7 +275,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
     return;
   }
 
-  auto it = bucket.table.insert({key_hash, ItemQueue()}).first;
+  auto it = bucket.table.insert({key_hash.table_hash(), ItemQueue()}).first;
   ItemQueue* queue = &it->second;
   if (queue->head == nullptr || queue->head->type == Item::kRecv) {
     // There is no message to pick up.
@@ -274,7 +299,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
         {
           mutex_lock l(bucket.mu);
 
-          auto it = bucket.table.find(key_hash);
+          auto it = bucket.table.find(key_hash.table_hash());
           if (it != bucket.table.end()) {
             ItemQueue* queue = &it->second;
             // Find an item in the queue with a cancellation token that matches
@@ -343,7 +368,7 @@ void LocalRendezvous::RecvAsync(const Rendezvous::ParsedKey& key,
               activity_watcher::Activity::Attributes{
                   {"Rendezvous", absl::StrFormat("%p", this)},
                   {"key", std::string(key.FullKey())},
-                  {"key_hash", absl::StrCat(key_hash)},
+                  {"key_hash", key_hash.ToString()},
               });
         },
         /*level=*/1);

diff --git a/tensorflow/core/framework/metrics.cc b/tensorflow/core/framework/metrics.cc
@@ -332,7 +332,7 @@ auto* tf_data_autotune_stopping_criteria_counter =
 
 auto* tf_data_debug = tsl::monitoring::Counter<1>::New(
     "/tensorflow/data/debug",
-    "The number of times this event occured, for debugging.", "event");
+    "The number of times this event occurred, for debugging.", "event");
 
 auto* tf_data_error = tsl::monitoring::Counter<2>::New(
     "/tensorflow/data/error",

diff --git a/tensorflow/core/framework/metrics.h b/tensorflow/core/framework/metrics.h
@@ -235,7 +235,7 @@ void RecordTFDataAutoShardRewriteBatchSize(
 // criterion is met.
 void RecordTFDataAutotuneStoppingCriteria(const std::string& name);
 
-// Records the number of times this event occured, for debugging.
+// Records the number of times this event occurred, for debugging.
 void RecordTFDataDebug(const std::string& event);
 
 // Records the number of times an error of this type occurred with this status

diff --git a/tensorflow/core/tfrt/run_handler_thread_pool/BUILD b/tensorflow/core/tfrt/run_handler_thread_pool/BUILD
@@ -84,7 +84,6 @@ tf_cc_test(
         "//tensorflow/core/kernels:matmul_op",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/synchronization",
-        "@com_google_absl//absl/time",
         "@com_google_googletest//:gtest",
         "@eigen_archive//:eigen3",
         "@tf_runtime//:hostcontext",

diff --git a/tensorflow/core/tfrt/run_handler_thread_pool/run_handler.cc b/tensorflow/core/tfrt/run_handler_thread_pool/run_handler.cc
@@ -1068,7 +1068,7 @@ void RunHandler::ScheduleInterOpClosure(TaskFunction fn) {
 }
 
 void RunHandler::ScheduleIntraOpClosure(TaskFunction fn) {
-  impl_->ScheduleIntraOpClosure(std::move(fn));
+  impl_->ScheduleInterOpClosure(std::move(fn));
 }
 
 int RunHandler::NumThreads() const {

diff --git a/tensorflow/core/tfrt/run_handler_thread_pool/run_handler_test.cc b/tensorflow/core/tfrt/run_handler_thread_pool/run_handler_test.cc
@@ -31,7 +31,6 @@ limitations under the License.
 
 #include "absl/synchronization/barrier.h"
 #include "absl/synchronization/notification.h"
-#include "absl/time/time.h"
 #include "unsupported/Eigen/CXX11/Tensor"  // from @eigen_archive
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/blocking_counter.h"
@@ -212,39 +211,6 @@ TEST(RunHandlerUtilTest, IntraOpThreadPool) {
   notification.WaitForNotification();
 }
 
-// Verifies that ScheduleIntraOpClosure enqueues work to the non-blocking
-// (intra-op) queue.
-TEST(RunHandlerUtilTest, ScheduleIntraOpClosureRoutesToNonBlockingQueue) {
-  RunHandlerPool::Options options;
-  options.num_inter_op_threads = 1;  // 1 blocking thread
-  options.num_intra_op_threads = 1;  // 1 non-blocking thread
-  options.num_threads_in_sub_thread_pool = {2};
-  std::unique_ptr<RunHandlerPool> pool(new RunHandlerPool(options));
-
-  auto handler = pool->Get(/*step_id=*/1, /*timeout_in_ms=*/0);
-
-  // Block the sole inter-op (blocking) thread.
-  absl::Notification blocker_started;
-  absl::Notification blocker_release;
-  handler->ScheduleInterOpClosure(TaskFunction([&]() {
-    blocker_started.Notify();
-    blocker_release.WaitForNotification();
-  }));
-  blocker_started.WaitForNotification();
-
-  // Schedule an intra-op closure. With the correct implementation this goes
-  // to the non-blocking queue and the intra-op thread picks it up.
-  absl::Notification intra_done;
-  handler->ScheduleIntraOpClosure(TaskFunction([&]() { intra_done.Notify(); }));
-
-  // If ScheduleIntraOpClosure incorrectly enqueued as blocking work, the
-  // intra-op thread cannot pick it up and this would hang.
-  EXPECT_TRUE(intra_done.WaitForNotificationWithTimeout(absl::Seconds(10)));
-
-  // Unblock the inter-op thread so the pool can shut down cleanly.
-  blocker_release.Notify();
-}
-
 class RunHandlerThreadPoolTest
     : public testing::TestWithParam<std::tuple<bool, bool>> {
  protected:

diff --git a/tensorflow/core/tfrt/tfrt_session/BUILD b/tensorflow/core/tfrt/tfrt_session/BUILD
@@ -130,9 +130,7 @@ tf_cc_shared_test(
 #     name = "tfrt_session_python_test",
 #     srcs = ["tfrt_session_python_test.py"],
 #     exec_properties = select({
-#         "//tools/cpp:asan_build": {"cpp_link.mem": "20g"},
-#         "//tools/cpp:msan_build": {"cpp_link.mem": "20g"},
-#         "//tools/cpp:tsan_build": {"cpp_link.mem": "20g"},
+#         "//tools/cpp:sanitizer_build": {"cpp_link.mem": "20g"},
 #         "//conditions:default": None,
 #     }),
 #     deps = [

diff --git a/third_party/py/python_init_rules.bzl b/third_party/py/python_init_rules.bzl
@@ -37,9 +37,9 @@ def python_init_rules(extra_patches = []):
 
     tf_http_archive(
         name = "rules_python",
-        sha256 = "c85d5db38d3eac06167a13b10c9dba54b003a986cd4f1ebc00806b74e7c12f06",
-        strip_prefix = "rules_python-1.8.4",
-        urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.8.4/rules_python-1.8.4.tar.gz"),
+        sha256 = "8964aa1e7525fea5244ba737458694a057ada1be96a92998a41caa1983562d00",
+        strip_prefix = "rules_python-1.8.5",
+        urls = tf_mirror_urls("https://github.com/bazelbuild/rules_python/releases/download/1.8.5/rules_python-1.8.5.tar.gz"),
         patch_file = [
             "@xla//third_party/py:rules_python_scope.patch",
             "@xla//third_party/py:rules_python_freethreaded.patch",

diff --git a/third_party/xla/.github/workflows/rocm_jax_ut.yml b/third_party/xla/.github/workflows/rocm_jax_ut.yml
@@ -84,5 +84,4 @@ jobs:
             --override_repository=xla=${GITHUB_WORKSPACE} \
             --config=single_gpu \
             --local_test_jobs=4 \
-            --repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE} \
-            --crosstool_top=@local_config_rocm//crosstool:toolchain-local
+            --repo_env=TF_ROCM_RBE_DOCKER_IMAGE=${DOCKER_IMAGE}
diff --git a/third_party/xla/MODULE.bazel b/third_party/xla/MODULE.bazel
@@ -25,7 +25,7 @@ bazel_dep(name = "re2", version = "2025-11-05", repo_name = "com_googlesource_co
 bazel_dep(name = "rules_cc", version = "0.2.0")
 bazel_dep(name = "rules_java", version = "8.16.1")
 bazel_dep(name = "rules_license", version = "1.0.0")
-bazel_dep(name = "rules_python", version = "1.8.4")
+bazel_dep(name = "rules_python", version = "1.8.5")
 bazel_dep(name = "rules_shell", version = "0.6.1")
 bazel_dep(name = "snappy", version = "1.2.1")
 bazel_dep(name = "xxd", version = "9.1.0917")
@@ -137,7 +137,7 @@ single_version_override(
         "//third_party/py:rules_python_versions.patch",
         "//third_party/py:rules_python_scope.patch",
     ],
-    version = "1.8.4",
+    version = "1.8.5",
 )
 
 python = use_extension("@rules_python//python/extensions:python.bzl", "python")
@@ -230,9 +230,6 @@ register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64")
 register_toolchains("@rules_ml_toolchain//cc:linux_aarch64_linux_aarch64_cuda")
 
 ### Other local config repos
-local_clang_configure = use_extension("@rules_ml_toolchain//cc/llvms/local:local_clang_configure.bzl", "local_clang_configure_ext")
-use_repo(local_clang_configure, "local_config_clang")
-
 rocm_configure = use_extension("//third_party/extensions:rocm_configure.bzl", "rocm_configure_ext")
 use_repo(rocm_configure, "local_config_rocm")
 

diff --git a/third_party/xla/build_tools/ci/build.py b/third_party/xla/build_tools/ci/build.py
@@ -53,10 +53,6 @@
     "//build_tools/...",
     "@tsl//tsl/...",
 )
-_XLA_ONEAPI_TARGET_PATTERNS = (
-    "//xla/stream_executor/sycl/...",
-    "//xla/service/gpu/...",
-)
 _XLA_CPU_PRESUBMIT_BENCHMARKS_DEFAULT_TARGET_PATTERNS = (
     "//xla/tools/multihost_hlo_runner:hlo_runner_main",
     "//xla/tools:compute_xspace_stats_main",
@@ -512,7 +508,7 @@ def nvidia_gpu_build_with_compute_capability(
         "sycl_hermetic",
         "icpx_clang",
     ),
-    target_patterns=_XLA_ONEAPI_TARGET_PATTERNS,
+    target_patterns=_XLA_DEFAULT_TARGET_PATTERNS,
     build_tag_filters=oneapi_build_tag_filter,
     test_tag_filters=oneapi_test_tag_filter,
     options={**_DEFAULT_BAZEL_OPTIONS, "//xla/tsl:ci_build": True},

diff --git a/third_party/xla/build_tools/ci/golden_commands.txt b/third_party/xla/build_tools/ci/golden_commands.txt
@@ -98,8 +98,8 @@ bazel test --build_tag_filters=-no_oss,requires-gpu-nvidia,-rocm-only,-oneapi-on
 bazel analyze-profile profile.json.gz
 # END BuildType.XLA_LINUX_X86_GPU_L4_GITHUB_ACTIONS
 # BEGIN BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS
-parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build --nobuild -- //xla/stream_executor/sycl/... //xla/service/gpu/...
-bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/stream_executor/sycl/... //xla/service/gpu/...
+parallel --ungroup --retries 3 --delay 15 --nonall -- bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build --nobuild -- //xla/... //build_tools/... @tsl//tsl/...
+bazel build --build_tag_filters=oneapi-only,requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --test_tag_filters=oneapi-only,-requires-gpu-intel,-requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-cuda-only,-rocm-only,-no-oneapi --config=nonccl --config=rbe_linux_cpu --config=sycl --config=sycl_hermetic --config=icpx_clang --color=yes --test_output=errors --verbose_failures --keep_going --nobuild_tests_only --profile=profile.json.gz --flaky_test_attempts=3 --jobs=150 --bes_upload_mode=fully_async --//xla/tsl:ci_build -- //xla/... //build_tools/... @tsl//tsl/...
 bazel analyze-profile profile.json.gz
 # END BuildType.XLA_LINUX_X86_GPU_ONEAPI_GITHUB_ACTIONS
 # BEGIN BuildType.XLA_MACOS_ARM64_CPU_KOKORO

diff --git a/third_party/xla/tensorflow.bazelrc b/third_party/xla/tensorflow.bazelrc
@@ -273,26 +273,25 @@ common:asan --copt -g
 common:asan --copt -fno-omit-frame-pointer
 common:asan --linkopt -fsanitize=address
 
+common:rocm_base --config=clang_local
 common:rocm_base --copt=-Wno-gnu-offsetof-extensions
+common:rocm_base --crosstool_top=@local_config_rocm//crosstool:toolchain
 common:rocm_base --define=using_rocm_hipcc=true
 common:rocm_base --define=tensorflow_mkldnn_contraction_kernel=0
 common:rocm_base --repo_env TF_NEED_ROCM=1
-common:rocm_base --action_env=HIPCC_COMPILE_FLAGS_APPEND="--offload-compress"
 
-# ROCm with hermetic clang toolchain
-common:rocm_clang_hermetic --config=rocm_base
-common:rocm_clang_hermetic --extra_toolchains=@local_config_rocm//crosstool:toolchain-linux-x86_64-hermetic
+common:rocm_clang_official --config=rocm_base
+common:rocm_clang_official --action_env=CLANG_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
+common:rocm_clang_official --action_env=HIPCC_COMPILE_FLAGS_APPEND="--offload-compress"
+common:rocm_clang_official --action_env=TF_ROCM_CLANG="1"
+common:rocm_clang_official --linkopt="-fuse-ld=lld"
+common:rocm_clang_official --host_linkopt="-fuse-ld=lld"
 
-# ROCm with local/system clang toolchain
-# CLANG_COMPILER_PATH sets the compiler for the wrapper script
-common:rocm_clang_local --config=rocm_base
-common:rocm_clang_local --extra_toolchains=@local_config_rocm//crosstool:toolchain-linux-x86_64-local
-
-common:rocm --config=rocm_clang_hermetic
+common:rocm --config=rocm_clang_official
 common:rocm_ci --config=rocm
 
 common:rocm_ci_hermetic --dynamic_mode=off
-common:rocm_ci_hermetic --config=rocm_clang_hermetic
+common:rocm_ci_hermetic --config=rocm_clang_official
 common:rocm_ci_hermetic --repo_env="ROCM_DISTRO_VERSION=rocm_7.10.0_gfx90X"
 common:rocm_ci_hermetic --@local_config_rocm//rocm:rocm_path_type=hermetic
 

diff --git a/third_party/xla/third_party/extensions/local_clang_configure.bzl b/third_party/xla/third_party/extensions/local_clang_configure.bzl