@@ -395,9 +395,9 @@ resource "aws_glue_catalog_table_optimizer" "wallets_aggregations_compaction_opt
395395 }
396396}
397397
398- /* resource "aws_glue_catalog_table" "features " {
398+ /* resource "aws_glue_catalog_table" "scaled_features " {
399399 database_name = aws_glue_catalog_database.bdp_db.name
400- name = "features "
400+ name = "scaled_features "
401401
402402 table_type = "EXTERNAL_TABLE"
403403 open_table_format_input {
@@ -418,7 +418,7 @@ resource "aws_glue_catalog_table_optimizer" "wallets_aggregations_compaction_opt
418418 }
419419
420420 storage_descriptor {
421- location = "s3://${var.bdp_features_bucket }"
421+ location = "s3://${var.bdp_scaled_features_bucket }"
422422 input_format = "org.apache.hadoop.mapred.FileInputFormat"
423423 output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
424424 compressed = true
@@ -677,10 +677,10 @@ resource "aws_glue_catalog_table_optimizer" "wallets_aggregations_compaction_opt
677677}*/
678678
679679
680- resource "aws_glue_catalog_table_optimizer" "features_orphan_files_deletion_optimizer " {
680+ resource "aws_glue_catalog_table_optimizer" "scaled_features_orphan_files_deletion_optimizer " {
681681 catalog_id = " 982534349340"
682682 database_name = aws_glue_catalog_database. bdp_db . name
683- table_name = " features "
683+ table_name = " scaled_features "
684684 type = " orphan_file_deletion"
685685
686686 configuration {
@@ -690,16 +690,328 @@ resource "aws_glue_catalog_table_optimizer" "features_orphan_files_deletion_opti
690690 orphan_file_deletion_configuration {
691691 iceberg_configuration {
692692 orphan_file_retention_period_in_days = 2
693- location = " s3://${ var . bdp_features_bucket } "
693+ location = " s3://${ var . bdp_scaled_features_bucket } "
694694 }
695695 }
696696 }
697697}
698698
699- resource "aws_glue_catalog_table_optimizer" "features_compaction_optimizer " {
699+ resource "aws_glue_catalog_table_optimizer" "scaled_features_compaction_optimizer " {
700700 catalog_id = " 982534349340"
701701 database_name = aws_glue_catalog_database. bdp_db . name
702- table_name = " features"
702+ table_name = " scaled_features"
703+ type = " compaction"
704+
705+ configuration {
706+ role_arn = var. glue_role_arn
707+ enabled = true
708+ }
709+ }
710+
711+ /* resource "aws_glue_catalog_table" "unscaled_features" {
712+ database_name = aws_glue_catalog_database.bdp_db.name
713+ name = "unscaled_features"
714+
715+ table_type = "EXTERNAL_TABLE"
716+ open_table_format_input {
717+ iceberg_input {
718+ metadata_operation = "CREATE"
719+ }
720+ }
721+
722+ //Commented because https://github.com/hashicorp/terraform-provider-aws/issues/36531
723+ partition_keys {
724+ name = "network_name"
725+ type = "boolean"
726+ }
727+
728+ parameters = {
729+ "write.format.default" = "parquet",
730+ "write.parquet.compression-codec" = "zstd"
731+ }
732+
733+ storage_descriptor {
734+ location = "s3://${var.bdp_unscaled_features_bucket}"
735+ input_format = "org.apache.hadoop.mapred.FileInputFormat"
736+ output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
737+ compressed = true
738+
739+ ser_de_info {
740+ name = "features_serde"
741+ serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
742+ }
743+
744+ columns {
745+ name = "block_timestamp"
746+ type = "bigint"
747+ }
748+ columns {
749+ name = "block_number"
750+ type = "bigint"
751+ }
752+ columns {
753+ name = "transaction_index"
754+ type = "bigint"
755+ }
756+ columns {
757+ name = "fee"
758+ type = "float"
759+ }
760+ columns {
761+ name = "total_transferred_value"
762+ type = "float"
763+ }
764+ columns {
765+ name = "total_input_value"
766+ type = "float"
767+ }
768+ columns {
769+ name = "sent_value"
770+ type = "float"
771+ }
772+ columns {
773+ name = "received_value"
774+ type = "float"
775+ }
776+ columns {
777+ name = "network_name"
778+ type = "boolean"
779+ }
780+ columns {
781+ name = "avg_sent_value"
782+ type = "float"
783+ }
784+ columns {
785+ name = "avg_received_value"
786+ type = "float"
787+ }
788+ columns {
789+ name = "avg_total_value_for_sender"
790+ type = "float"
791+ }
792+ columns {
793+ name = "avg_total_value_for_receiver"
794+ type = "float"
795+ }
796+ columns {
797+ name = "sum_sent_value"
798+ type = "float"
799+ }
800+ columns {
801+ name = "sum_received_value"
802+ type = "float"
803+ }
804+ columns {
805+ name = "sum_total_value_for_sender"
806+ type = "float"
807+ }
808+ columns {
809+ name = "sum_total_value_for_receiver"
810+ type = "float"
811+ }
812+ columns {
813+ name = "min_sent_value"
814+ type = "float"
815+ }
816+ columns {
817+ name = "min_received_value"
818+ type = "float"
819+ }
820+ columns {
821+ name = "min_total_value_for_sender"
822+ type = "float"
823+ }
824+ columns {
825+ name = "min_total_value_for_receiver"
826+ type = "float"
827+ }
828+ columns {
829+ name = "max_sent_value"
830+ type = "float"
831+ }
832+ columns {
833+ name = "max_received_value"
834+ type = "float"
835+ }
836+ columns {
837+ name = "max_total_value_for_sender"
838+ type = "float"
839+ }
840+ columns {
841+ name = "max_total_value_for_receiver"
842+ type = "float"
843+ }
844+ columns {
845+ name = "median_sent_value"
846+ type = "float"
847+ }
848+ columns {
849+ name = "median_received_value"
850+ type = "float"
851+ }
852+ columns {
853+ name = "median_total_value_for_sender"
854+ type = "float"
855+ }
856+ columns {
857+ name = "median_total_value_for_receiver"
858+ type = "float"
859+ }
860+ columns {
861+ name = "mode_sent_value"
862+ type = "float"
863+ }
864+ columns {
865+ name = "mode_received_value"
866+ type = "float"
867+ }
868+ columns {
869+ name = "mode_total_value_for_sender"
870+ type = "float"
871+ }
872+ columns {
873+ name = "mode_total_value_for_receiver"
874+ type = "float"
875+ }
876+ columns {
877+ name = "stddev_sent_value"
878+ type = "float"
879+ }
880+ columns {
881+ name = "stddev_received_value"
882+ type = "float"
883+ }
884+ columns {
885+ name = "stddev_total_value_for_sender"
886+ type = "float"
887+ }
888+ columns {
889+ name = "stddev_total_value_for_receiver"
890+ type = "float"
891+ }
892+ columns {
893+ name = "num_sent_transactions"
894+ type = "bigint"
895+ }
896+ columns {
897+ name = "num_received_transactions"
898+ type = "bigint"
899+ }
900+ columns {
901+ name = "avg_time_between_sent_transactions"
902+ type = "float"
903+ }
904+ columns {
905+ name = "avg_time_between_received_transactions"
906+ type = "float"
907+ }
908+ columns {
909+ name = "avg_outgoing_speed_count"
910+ type = "float"
911+ }
912+ columns {
913+ name = "avg_incoming_speed_count"
914+ type = "float"
915+ }
916+ columns {
917+ name = "avg_outgoing_speed_value"
918+ type = "float"
919+ }
920+ columns {
921+ name = "avg_incoming_speed_value"
922+ type = "float"
923+ }
924+ columns {
925+ name = "avg_outgoing_acceleration_count"
926+ type = "float"
927+ }
928+ columns {
929+ name = "avg_incoming_acceleration_count"
930+ type = "float"
931+ }
932+ columns {
933+ name = "avg_outgoing_acceleration_value"
934+ type = "float"
935+ }
936+ columns {
937+ name = "avg_incoming_acceleration_value"
938+ type = "float"
939+ }
940+ columns {
941+ name = "avg_fee_paid"
942+ type = "float"
943+ }
944+ columns {
945+ name = "total_fee_paid"
946+ type = "float"
947+ }
948+ columns {
949+ name = "min_fee_paid"
950+ type = "float"
951+ }
952+ columns {
953+ name = "max_fee_paid"
954+ type = "float"
955+ }
956+ columns {
957+ name = "activity_duration_for_sender"
958+ type = "bigint"
959+ }
960+ columns {
961+ name = "first_transaction_timestamp_for_sender"
962+ type = "bigint"
963+ }
964+ columns {
965+ name = "last_transaction_timestamp_for_sender"
966+ type = "bigint"
967+ }
968+ columns {
969+ name = "activity_duration_for_receiver"
970+ type = "bigint"
971+ }
972+ columns {
973+ name = "first_transaction_timestamp_for_receiver"
974+ type = "bigint"
975+ }
976+ columns {
977+ name = "last_transaction_timestamp_for_receiver"
978+ type = "bigint"
979+ }
980+ columns {
981+ name = "unique_out_degree"
982+ type = "bigint"
983+ }
984+ columns {
985+ name = "unique_in_degree"
986+ type = "bigint"
987+ }
988+
989+ }
990+ }*/
991+
992+ resource "aws_glue_catalog_table_optimizer" "unscaled_features_orphan_files_deletion_optimizer" {
993+ catalog_id = " 982534349340"
994+ database_name = aws_glue_catalog_database. bdp_db . name
995+ table_name = " unscaled_features"
996+ type = " orphan_file_deletion"
997+
998+ configuration {
999+ role_arn = var. glue_role_arn
1000+ enabled = true
1001+
1002+ orphan_file_deletion_configuration {
1003+ iceberg_configuration {
1004+ orphan_file_retention_period_in_days = 2
1005+ location = " s3://${ var . bdp_unscaled_features_bucket } "
1006+ }
1007+ }
1008+ }
1009+ }
1010+
1011+ resource "aws_glue_catalog_table_optimizer" "unscaled_features_compaction_optimizer" {
1012+ catalog_id = " 982534349340"
1013+ database_name = aws_glue_catalog_database. bdp_db . name
1014+ table_name = " unscaled_features"
7031015 type = " compaction"
7041016
7051017 configuration {
0 commit comments