Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c84dd45
Automated Code Change
tensorflower-gardener Apr 1, 2026
cd712c8
Always populate target config in GpuTopology created in PjRt client
beckerhe Apr 1, 2026
eb82606
Automated Code Change
tensorflower-gardener Apr 1, 2026
528828c
[XLA:GPU] Prevent collective memory from being destructed until the m…
PatriosTheGreat Apr 1, 2026
7bee043
PR #40180: [XLA:GPU][oneAPI] Remove context activation and update SYC…
bhavani-subramanian Apr 1, 2026
4c8cdd0
Move gpu_device_info_test to target_config directory.
beckerhe Apr 1, 2026
7cbbb19
Fix scan rewriter to handle broadcasted initial values and correct la…
chsigg Apr 1, 2026
ade7baa
PR #33269: Addin a knob to control the limitation of async-compute re…
mingxu1067 Apr 1, 2026
614e49d
Automated Code Change
tensorflower-gardener Apr 1, 2026
d2c0496
PR #39601: [xla:gpu] Add support for dynamically sized packed kernel …
ezhulenev Apr 1, 2026
a885b31
Automated Code Change
tensorflower-gardener Apr 1, 2026
f2c5588
Move utility for in/outliner into shardy. part 2.
ekayaaslan Apr 1, 2026
c6d844d
PR #39854: [ROCm] Use hermetic clang for rocm
alekstheod Apr 1, 2026
c81cd43
[XLA:GPU]: Unify collective codegen checks in one place.
sohaibiftikhar Apr 1, 2026
1b47788
[XLA:GPU] Actually use `ConcurrentRegionsHloOrdering` for command buf…
thomasjoerg Apr 1, 2026
97d041f
[XLA:GPU] Sort the tiled instructions taking RT variables into account.
pifon2a Apr 1, 2026
17a6cf8
Run shardy/google/integrate_latest.sh to integrate.
ekayaaslan Apr 1, 2026
0fc4edc
[XLA:GPU] Remove NCCL from dependencies if no_nccl definition is prov…
PatriosTheGreat Apr 1, 2026
faf37bf
Use utility to walk on call graphs.
ekayaaslan Apr 1, 2026
566ae3a
PR #39725: [xla] Add global per-process hang watchdog
ezhulenev Apr 1, 2026
e65ba7e
[XLA:GPU] Mark NVSHMEM thunks as deprecated.
PatriosTheGreat Apr 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions tensorflow/core/kernels/split_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class SplitOpBase : public OpKernel {
context, split_dim_tensor.shape().dims() == 0,
errors::InvalidArgument("split_dim must be a scalar but has rank ",
split_dim_tensor.shape().dims()));
const int32_t split_dim_orig = split_dim_tensor.flat<int32>()(0);
const int32_t split_dim_orig = split_dim_tensor.flat<int32_t>()(0);
const int32_t split_dim =
split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;
const int32_t num_split = num_outputs();
Expand Down Expand Up @@ -210,7 +210,7 @@ class SplitOpCPU : public SplitOpBase<CPUDevice, T> {
const int32_t num_split = Base::num_outputs();
const Tensor& input = context->input(1);
const TensorShape& input_shape = input.shape();
const int32_t split_dim_orig = context->input(0).flat<int32>()(0);
const int32_t split_dim_orig = context->input(0).flat<int32_t>()(0);
const int32_t split_dim =
split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;

Expand Down Expand Up @@ -280,20 +280,20 @@ class SplitOpGPU : public SplitOpBase<GPUDevice, T> {
}
const Tensor& input = context->input(1);
const TensorShape& input_shape = input.shape();
const int32_t split_dim_orig = context->input(0).flat<int32>()(0);
const int32_t split_dim_orig = context->input(0).flat<int32_t>()(0);
const int32_t split_dim =
split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig;
const int32_t num_split = Base::num_outputs();
OP_REQUIRES(
context,
FastBoundsCheck(input.NumElements(), std::numeric_limits<int32>::max()),
errors::InvalidArgument("Split on GPU requires input size "
"< max int32"));
OP_REQUIRES(context,
FastBoundsCheck(input.NumElements(),
std::numeric_limits<int32_t>::max()),
errors::InvalidArgument("Split on GPU requires input size "
"< max int32"));
int32_t prefix_dim_size;
int32_t split_dim_size;
int32_t suffix_dim_size;
std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) =
Base::template SetDims<int32>(input_shape, split_dim);
Base::template SetDims<int32_t>(input_shape, split_dim);

const int32_t split_dim_output_size = split_dim_size / num_split;
TensorShape output_shape(input_shape);
Expand Down
15 changes: 8 additions & 7 deletions tensorflow/core/kernels/transpose_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,18 @@ TF_CALL_float8_e5m2(REGISTER) TF_CALL_float8_e4m3fn(REGISTER)
#undef REGISTER

#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx, const Tensor& in,
gtl::ArraySlice<int32> perm,
Tensor* out) {
absl::Status TransposeGpuOp::DoTranspose(OpKernelContext* ctx,
const Tensor& in,
absl::Span<const int32> perm,
Tensor* out) {
typedef Eigen::GpuDevice GPUDevice;
return ::tensorflow::DoTranspose(ctx->eigen_device<GPUDevice>(), in, perm,
out);
}
Status ConjugateTransposeGpuOp::DoTranspose(OpKernelContext* ctx,
const Tensor& in,
gtl::ArraySlice<int32> perm,
Tensor* out) {
absl::Status ConjugateTransposeGpuOp::DoTranspose(OpKernelContext* ctx,
const Tensor& in,
absl::Span<const int32> perm,
Tensor* out) {
typedef Eigen::GpuDevice GPUDevice;
return ::tensorflow::DoConjugateTranspose(ctx->eigen_device<GPUDevice>(), in,
perm, out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ void ReluTestImpl() {
model.ApplyDelegateAndInvoke();

EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 3}));
EXPECT_THAT(model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(
ArrayFloatNear({1.0, 5.0, 6.0}, /*max_abs_error=*/0.03)));
EXPECT_THAT(
model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(ArrayFloatNear({1.0, 5.0, 6.0}, /*max_abs_err=*/0.03)));
}

template <typename integer_type, TensorType tensor_dtype>
Expand All @@ -108,9 +108,9 @@ void Relu6TestImpl() {
model.ApplyDelegateAndInvoke();

EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 3}));
EXPECT_THAT(model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(
ArrayFloatNear({4.0, 0.0, 6.0}, /*max_abs_error=*/0.03)));
EXPECT_THAT(
model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(ArrayFloatNear({4.0, 0.0, 6.0}, /*max_abs_err=*/0.03)));
}

template <typename integer_type, TensorType tensor_dtype>
Expand All @@ -127,7 +127,7 @@ void TanhTestImpl() {
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 3}));
EXPECT_THAT(model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(ArrayFloatNear({1.00392, -0.752941, 1.00392},
/*max_abs_error=*/0.03)));
/*max_abs_err=*/0.03)));
}

template <typename integer_type, TensorType tensor_dtype>
Expand All @@ -150,7 +150,7 @@ void SigmoidTestImpl() {
EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 3}));
EXPECT_THAT(model.GetDequantizedOutput<integer_type>(),
ElementsAreArray(ArrayFloatNear({0.977, 0.266, 0.996},
/*max_abs_error=*/0.03)));
/*max_abs_err=*/0.03)));
}

TEST(ActivationOpModel, ReluOutput_UInt8) {
Expand Down
62 changes: 52 additions & 10 deletions tensorflow/lite/delegates/hexagon/builders/tests/concat_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,24 @@ void FourInputsQuantizedSameRangeImpl() {
EXPECT_THAT(m0.GetDequantizedOutput<integer_type>(),
ElementsAreArray(ArrayFloatNear(
{
1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, //
4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, //
1.0f,
3.0f,
1.1f,
3.1f,
1.2f,
3.2f,
1.3f,
3.3f, //
4.0f,
7.0f,
4.1f,
7.1f,
4.2f,
7.2f,
4.3f,
7.3f, //
},
/*max_abs_error=*/0.2)));
/*max_abs_err=*/0.2)));
}

TEST(QuantizedConcatenationOpModel, FourInputsQuantizedSameRange_UInt8) {
Expand Down Expand Up @@ -167,10 +181,24 @@ TEST(QuantizedConcatenationOpModel, FourInputsQuantizedMixedRange) {
EXPECT_THAT(m0.GetDequantizedOutput<uint8_t>(),
ElementsAreArray(ArrayFloatNear(
{
1.0f, 3.0f, 1.1f, 3.1f, 1.2f, 3.2f, 1.3f, 3.3f, //
4.0f, 7.0f, 4.1f, 7.1f, 4.2f, 7.2f, 4.3f, 7.3f, //
1.0f,
3.0f,
1.1f,
3.1f,
1.2f,
3.2f,
1.3f,
3.3f, //
4.0f,
7.0f,
4.1f,
7.1f,
4.2f,
7.2f,
4.3f,
7.3f, //
},
/*max_abs_error=*/0.2)));
/*max_abs_err=*/0.2)));
}

TEST(QuantizedConcatenationOpModel, FourInputsAxis2_UInt8) {
Expand All @@ -189,10 +217,24 @@ TEST(QuantizedConcatenationOpModel, FourInputsAxis2_UInt8) {
EXPECT_THAT(m0.GetDequantizedOutput<uint8_t>(),
ElementsAreArray(ArrayFloatNear(
{
1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, //
-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, //
1.0f,
-1.0f,
1.0f,
1.0f,
1.0f,
-1.0f,
1.0f,
1.0f, //
-1.0f,
-1.0f,
1.0f,
1.0f,
-1.0f,
1.0f,
1.0f,
1.0f, //
},
/*max_abs_error=*/0.2)));
/*max_abs_err=*/0.2)));
}

// If the input min/max (across all tensors) is same as the output min/max,
Expand Down Expand Up @@ -238,7 +280,7 @@ TEST(QuantizedConcatenationOpModel, FourInputsQuantizedMixedRange_LargeData) {
m0.ApplyDelegateAndInvoke();
EXPECT_THAT(m0.GetDequantizedOutput<uint8_t>(),
ElementsAreArray(ArrayFloatNear(reference_output,
/*max_abs_error=*/0.1)));
/*max_abs_err=*/0.1)));
}

} // namespace tflite
4 changes: 2 additions & 2 deletions tensorflow/lite/delegates/hexagon/builders/tests/neg_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ TEST(NegOpModel, NegTest_UInt8) {
EXPECT_THAT(
m.GetDequantizedOutput<uint8_t>(),
ElementsAreArray(ArrayFloatNear({2.0f, 1.0f, 0.f, -1.0f, -2.0f, -3.0f},
/*max_abs_error=*/0.05)));
/*max_abs_err=*/0.05)));
}

TEST(NegOpModel, NegTest_Int8) {
Expand All @@ -68,7 +68,7 @@ TEST(NegOpModel, NegTest_Int8) {
EXPECT_THAT(
m.GetDequantizedOutput<int8_t>(),
ElementsAreArray(ArrayFloatNear({2.0f, 1.0f, 0.f, -1.0f, -2.0f, -3.0f},
/*max_abs_error=*/0.05)));
/*max_abs_err=*/0.05)));
}

} // namespace tflite
Loading
Loading