Skip to content

Commit 5122477

Browse files
committed
Writer-side cleanup: unify AppendingStringWriter, qualify ::arrow namespace
Merge StringLikeWriter and EnumWriter into a single AppendingStringWriter template. Qualify arrow:: as ::arrow:: consistently across writer headers and sources. Standardize include guards. Signed-off-by: Arham Chopra <arham.chopra@cubistsystematic.com>
1 parent e5f6dca commit 5122477

23 files changed

Lines changed: 143 additions & 170 deletions

cpp/csp/adapters/arrow/ArrowFieldWriter.cpp

Lines changed: 32 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ namespace csp::adapters::arrow
1818
#define ARROW_OK_OR_THROW( expr, msg ) \
1919
do { auto _st = ( expr ); if( !_st.ok() ) CSP_THROW( RuntimeException, msg << ": " << _st.ToString() ); } while(0)
2020

21-
// --- Base class default implementations ---
2221

2322
void FieldWriter::reserve( int64_t numRows )
2423
{
@@ -54,7 +53,6 @@ std::vector<std::shared_ptr<::arrow::Array>> FieldWriter::finish()
5453
namespace
5554
{
5655

57-
// --- Generic lambda-based writer for fixed-length types ---
5856
// ValueFn signature: auto(const Struct *) — returns the value to UnsafeAppend/Append.
5957
// Covers: all numeric primitives, bool, DateTime, TimeDelta, Time, Date.
6058

@@ -123,16 +121,17 @@ CreatedFieldWriter makeNanosWriter( const std::string & name, const StructFieldP
123121
} );
124122
}
125123

126-
// --- String / Bytes writer (variable-length: needs safe Append) ---
127124

128-
template<typename ArrowBuilderT>
129-
class StringLikeWriter final : public FieldWriter
125+
// Appending string writer: covers both StringLikeWriter and EnumWriter via ExtractFn.
126+
// ExtractFn signature: const char* (const Struct*, size_t& len)
127+
template<typename ArrowBuilderT, typename ExtractFn>
128+
class AppendingStringWriter final : public FieldWriter
130129
{
131130
public:
132-
StringLikeWriter( const std::string & columnName, const StructFieldPtr & field,
133-
std::shared_ptr<::arrow::DataType> dataType )
131+
AppendingStringWriter( const std::string & columnName, const StructFieldPtr & field,
132+
std::shared_ptr<::arrow::DataType> dataType, ExtractFn fn )
134133
: FieldWriter( columnName, field, std::make_shared<ArrowBuilderT>(), std::move( dataType ) ),
135-
m_typedBuilder( static_cast<ArrowBuilderT *>( m_builder.get() ) ) {}
134+
m_typedBuilder( static_cast<ArrowBuilderT *>( m_builder.get() ) ), m_fn( std::move( fn ) ) {}
136135

137136
void writeAll( const std::vector<StructPtr> & structs, int64_t offset, int64_t count ) override
138137
{
@@ -141,8 +140,9 @@ class StringLikeWriter final : public FieldWriter
141140
const Struct * s = structs[i].get();
142141
if( m_field -> isSet( s ) )
143142
{
144-
auto & val = m_field -> value<std::string>( s );
145-
ARROW_OK_OR_THROW( m_typedBuilder -> Append( val.c_str(), val.length() ), "Failed to append string/bytes" );
143+
size_t len;
144+
auto * data = m_fn( s, len );
145+
ARROW_OK_OR_THROW( m_typedBuilder -> Append( data, len ), "Failed to append value" );
146146
}
147147
else
148148
ARROW_OK_OR_THROW( m_typedBuilder -> AppendNull(), "Failed to append null" );
@@ -152,50 +152,16 @@ class StringLikeWriter final : public FieldWriter
152152
protected:
153153
void doWrite( const Struct * s ) override
154154
{
155-
auto & val = m_field -> value<std::string>( s );
156-
ARROW_OK_OR_THROW( m_typedBuilder -> Append( val.c_str(), val.length() ), "Failed to append string/bytes" );
155+
size_t len;
156+
auto * data = m_fn( s, len );
157+
ARROW_OK_OR_THROW( m_typedBuilder -> Append( data, len ), "Failed to append value" );
157158
}
158159

159160
private:
160161
ArrowBuilderT * m_typedBuilder;
162+
ExtractFn m_fn;
161163
};
162164

163-
// --- Enum writer (variable-length string: CspEnum → name()) ---
164-
165-
class EnumWriter final : public FieldWriter
166-
{
167-
public:
168-
EnumWriter( const std::string & columnName, const StructFieldPtr & field )
169-
: FieldWriter( columnName, field, std::make_shared<::arrow::StringBuilder>(), ::arrow::utf8() ),
170-
m_typedBuilder( static_cast<::arrow::StringBuilder *>( m_builder.get() ) ) {}
171-
172-
void writeAll( const std::vector<StructPtr> & structs, int64_t offset, int64_t count ) override
173-
{
174-
for( int64_t i = offset; i < offset + count; ++i )
175-
{
176-
const Struct * s = structs[i].get();
177-
if( m_field -> isSet( s ) )
178-
{
179-
auto & n = m_field -> value<CspEnum>( s ).name();
180-
ARROW_OK_OR_THROW( m_typedBuilder -> Append( n.c_str(), n.length() ), "Failed to append enum" );
181-
}
182-
else
183-
ARROW_OK_OR_THROW( m_typedBuilder -> AppendNull(), "Failed to append null" );
184-
}
185-
}
186-
187-
protected:
188-
void doWrite( const Struct * s ) override
189-
{
190-
auto & n = m_field -> value<CspEnum>( s ).name();
191-
ARROW_OK_OR_THROW( m_typedBuilder -> Append( n.c_str(), n.length() ), "Failed to append enum" );
192-
}
193-
194-
private:
195-
::arrow::StringBuilder * m_typedBuilder;
196-
};
197-
198-
// --- Nested struct writer (recursive) ---
199165

200166
class NestedStructWriter final : public FieldWriter
201167
{
@@ -262,7 +228,6 @@ class NestedStructWriter final : public FieldWriter
262228
std::vector<std::unique_ptr<FieldWriter>> m_childWriters;
263229
};
264230

265-
// --- Factory helpers ---
266231

267232
bool isBytesField( const StructFieldPtr & field )
268233
{
@@ -290,7 +255,6 @@ CreatedFieldWriter createFieldWriter(
290255

291256
switch( f -> type() -> type() )
292257
{
293-
// --- Numeric ---
294258
case CspType::Type::BOOL:
295259
{
296260
auto b = std::make_shared<::arrow::BooleanBuilder>();
@@ -307,15 +271,28 @@ CreatedFieldWriter createFieldWriter(
307271
case CspType::Type::UINT64: return makePrimitiveWriter<uint64_t, ::arrow::UInt64Builder>( columnName, f );
308272
case CspType::Type::DOUBLE: return makePrimitiveWriter<double, ::arrow::DoubleBuilder>( columnName, f );
309273

310-
// --- String / Bytes ---
311274
case CspType::Type::STRING:
275+
{
276+
auto extractor = [f]( const Struct * s, size_t & len ) -> const char * {
277+
auto & val = f -> value<std::string>( s );
278+
len = val.length();
279+
return val.c_str();
280+
};
312281
if( isBytesField( f ) )
313-
return makeWriter<StringLikeWriter<::arrow::BinaryBuilder>>( columnName, f, ::arrow::binary() );
314-
return makeWriter<StringLikeWriter<::arrow::StringBuilder>>( columnName, f, ::arrow::utf8() );
282+
return makeWriter<AppendingStringWriter<::arrow::BinaryBuilder, decltype(extractor)>>( columnName, f, ::arrow::binary(), extractor );
283+
return makeWriter<AppendingStringWriter<::arrow::StringBuilder, decltype(extractor)>>( columnName, f, ::arrow::utf8(), extractor );
284+
}
315285

316-
case CspType::Type::ENUM: return makeWriter<EnumWriter>( columnName, f );
286+
case CspType::Type::ENUM:
287+
{
288+
auto extractor = [f]( const Struct * s, size_t & len ) -> const char * {
289+
auto & n = f -> value<CspEnum>( s ).name();
290+
len = n.length();
291+
return n.c_str();
292+
};
293+
return makeWriter<AppendingStringWriter<::arrow::StringBuilder, decltype(extractor)>>( columnName, f, ::arrow::utf8(), extractor );
294+
}
317295

318-
// --- Temporal ---
319296
case CspType::Type::DATETIME:
320297
return makeNanosWriter<DateTime, ::arrow::TimestampBuilder>(
321298
columnName, f, std::make_shared<::arrow::TimestampType>( ::arrow::TimeUnit::NANO, "UTC" ) );
@@ -326,7 +303,6 @@ CreatedFieldWriter createFieldWriter(
326303
return makeNanosWriter<Time, ::arrow::Time64Builder>(
327304
columnName, f, std::make_shared<::arrow::Time64Type>( ::arrow::TimeUnit::NANO ) );
328305

329-
// --- Date (days since epoch) ---
330306
case CspType::Type::DATE:
331307
{
332308
auto b = std::make_shared<::arrow::Date32Builder>();
@@ -336,7 +312,6 @@ CreatedFieldWriter createFieldWriter(
336312
} );
337313
}
338314

339-
// --- Nested struct ---
340315
case CspType::Type::STRUCT:
341316
{
342317
auto nestedMeta = std::static_pointer_cast<const CspStructType>( f -> type() ) -> meta();
@@ -373,7 +348,6 @@ CreatedFieldWriter createFieldWriter(
373348

374349
#undef ARROW_OK_OR_THROW
375350

376-
// --- List field writer factory ---
377351

378352
static ListFieldWriterFactory s_listFieldWriterFactory;
379353

cpp/csp/adapters/arrow/ArrowFieldWriter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ CreatedFieldWriter createFieldWriter(
101101
const StructFieldPtr & structField
102102
);
103103

104-
// --- List field writer factory (dependency injection for numpy list support) ---
105104

106105
// A function that writes a DialectGenericType (e.g. numpy array) into an arrow list builder.
107106
// The function handles list-start, element conversion, etc.
@@ -122,4 +121,4 @@ std::pair<std::shared_ptr<::arrow::ArrayBuilder>, ListItemsWriter> createListFie
122121

123122
}
124123

125-
#endif
124+
#endif // _IN_CSP_ADAPTERS_ARROW_ArrowFieldWriter_H

cpp/csp/adapters/arrow/RecordBatchToStruct.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ class RecordBatchToStructConverter
4949

5050
}
5151

52-
#endif
52+
#endif // _IN_CSP_ADAPTERS_ARROW_RecordBatchToStruct_H

cpp/csp/adapters/arrow/StructToRecordBatch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,4 @@ class StructToRecordBatchConverter
4747

4848
}
4949

50-
#endif
50+
#endif // _IN_CSP_ADAPTERS_ARROW_StructToRecordBatch_H

cpp/csp/adapters/parquet/ArrowIPCFileWriterWrapper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ void ArrowIPCFileWriterWrapper::openImpl( const std::string &fileName, const std
1313

1414
PARQUET_ASSIGN_OR_THROW(
1515
m_outputStream,
16-
arrow::io::FileOutputStream::Open( fileName.c_str()));
16+
::arrow::io::FileOutputStream::Open( fileName.c_str()));
1717

18-
arrow::ipc::IpcWriteOptions writeOptions;
18+
::arrow::ipc::IpcWriteOptions writeOptions;
1919
writeOptions.codec = resolveCompressionCodec( compression );
2020

2121
STATUS_OK_OR_THROW_RUNTIME(

cpp/csp/adapters/parquet/ArrowIPCFileWriterWrapper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ class ArrowIPCFileWriterWrapper : public FileWriterWrapper
4141
}
4242

4343

44-
#endif
44+
#endif // _IN_CSP_ADAPTERS_PARQUET_ArrowIPCFileWriterWrapper_H

0 commit comments

Comments
 (0)