@@ -487,6 +487,111 @@ def test_scope_based_naming_avoids_collisions(self):
487487 assert len (proto_descriptor .nested_type ) == 1
488488 assert proto_descriptor .nested_type [0 ].name == "root__my_record"
489489
490+ def test_field_name_sanitization (self ):
491+ """Test that field names are sanitized to be proto-compatible."""
492+ table_schema = types .TableSchema (
493+ fields = [
494+ types .TableFieldSchema (
495+ name = "field-with-hyphens" ,
496+ type_ = types .TableFieldSchema .Type .STRING ,
497+ ),
498+ types .TableFieldSchema (
499+ name = "field with spaces" ,
500+ type_ = types .TableFieldSchema .Type .STRING ,
501+ ),
502+ types .TableFieldSchema (
503+ name = "123field" ,
504+ type_ = types .TableFieldSchema .Type .STRING ,
505+ ),
506+ types .TableFieldSchema (
507+ name = "field@special#chars" ,
508+ type_ = types .TableFieldSchema .Type .STRING ,
509+ ),
510+ types .TableFieldSchema (
511+ name = "ValidField" ,
512+ type_ = types .TableFieldSchema .Type .STRING ,
513+ ),
514+ ]
515+ )
516+
517+ proto_descriptor = schema .table_schema_to_proto_descriptor (table_schema )
518+
519+ # Hyphens replaced with underscores
520+ assert proto_descriptor .field [0 ].name == "field_with_hyphens"
521+
522+ # Spaces replaced with underscores
523+ assert proto_descriptor .field [1 ].name == "field_with_spaces"
524+
525+ # Field starting with digit gets prepended underscore
526+ assert proto_descriptor .field [2 ].name == "_123field"
527+
528+ # Special characters replaced with underscores
529+ assert proto_descriptor .field [3 ].name == "field_special_chars"
530+
531+ # Valid field names are lowercased
532+ assert proto_descriptor .field [4 ].name == "validfield"
533+
534+ def test_field_name_sanitization_in_nested_structs (self ):
535+ """Test that field name sanitization works in nested STRUCT fields."""
536+ table_schema = types .TableSchema (
537+ fields = [
538+ types .TableFieldSchema (
539+ name = "outer-struct" ,
540+ type_ = types .TableFieldSchema .Type .STRUCT ,
541+ fields = [
542+ types .TableFieldSchema (
543+ name = "inner-field" ,
544+ type_ = types .TableFieldSchema .Type .STRING ,
545+ ),
546+ types .TableFieldSchema (
547+ name = "123inner" ,
548+ type_ = types .TableFieldSchema .Type .INT64 ,
549+ ),
550+ ],
551+ ),
552+ ]
553+ )
554+
555+ proto_descriptor = schema .table_schema_to_proto_descriptor (table_schema )
556+
557+ # Outer struct field name sanitized
558+ outer_field = proto_descriptor .field [0 ]
559+ assert outer_field .name == "outer_struct"
560+ assert outer_field .type_name == "root__outer_struct"
561+
562+ # Nested type name sanitized
563+ nested_type = proto_descriptor .nested_type [0 ]
564+ assert nested_type .name == "root__outer_struct"
565+
566+ # Inner fields sanitized
567+ assert nested_type .field [0 ].name == "inner_field"
568+ assert nested_type .field [1 ].name == "_123inner"
569+
570+ def test_field_name_sanitization_in_range_fields (self ):
571+ """Test that field name sanitization works for RANGE fields."""
572+ table_schema = types .TableSchema (
573+ fields = [
574+ types .TableFieldSchema (
575+ name = "date-range" ,
576+ type_ = types .TableFieldSchema .Type .RANGE ,
577+ range_element_type = types .TableFieldSchema .FieldElementType (
578+ type_ = types .TableFieldSchema .Type .DATE
579+ ),
580+ ),
581+ ]
582+ )
583+
584+ proto_descriptor = schema .table_schema_to_proto_descriptor (table_schema )
585+
586+ # Range field name sanitized
587+ range_field = proto_descriptor .field [0 ]
588+ assert range_field .name == "date_range"
589+ assert range_field .type_name == "root__date_range"
590+
591+ # Range type name sanitized
592+ range_type = proto_descriptor .nested_type [0 ]
593+ assert range_type .name == "root__date_range"
594+
490595
491596if __name__ == "__main__" :
492597 pytest .main ([__file__ ])
0 commit comments