From f869ae0562cd1d3ee6e7b8380b96744773ed6e67 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Fri, 29 May 2026 15:20:51 -0700 Subject: [PATCH 1/2] Remove lib/local-pcg-execution --- lib/CMakeLists.txt | 1 - lib/index.dox | 1 - lib/local-pcg-execution/CMakeLists.txt | 21 -- .../execute_tasks_for_parallel_layer.h | 59 ------ .../local_parallel_tensor_backing.dtg.toml | 24 --- .../local_parallel_tensor_backing.h | 41 ---- .../local_pcg_args_backing.dtg.toml | 21 -- .../local_pcg_args_backing.h | 20 -- .../local_pcg_training_backing.dtg.toml | 32 --- .../local_pcg_training_backing.h | 41 ---- .../mapped_per_device_op_states_group.h | 50 ----- .../mapped_runtime_task_group.h | 57 ------ .../parallel_forward_tensor_group.dtg.toml | 22 -- .../parallel_layer_instance_id_t.dtg.toml | 22 -- .../parallel_loss_tensor_group.dtg.toml | 22 -- .../parallel_tensor_accessors_w.dtg.toml | 14 -- ...runtime_atomic_task_shard_binding.dtg.toml | 25 --- .../runtime_atomic_task_shard_binding.h | 28 --- .../task_group_execution_times.dtg.toml | 27 --- .../training_operator_task_signature.dtg.toml | 34 --- ...ining_parallel_layer_plus_context.dtg.toml | 30 --- ...ining_parallel_tensor_shard_group.dtg.toml | 21 -- .../execute_tasks_for_parallel_layer.cc | 193 ------------------ .../local_parallel_tensor_backing.cc | 80 -------- .../local_pcg_args_backing.cc | 20 -- .../local_pcg_training_backing.cc | 45 ---- .../mapped_per_device_op_states_group.cc | 124 ----------- .../mapped_runtime_task_group.cc | 123 ----------- .../runtime_atomic_task_shard_binding.cc | 88 -------- lib/local-pcg-execution/test/CMakeLists.txt | 16 -- .../local_pcg_training_backing.cc | 10 - 31 files changed, 1312 deletions(-) delete mode 100644 lib/local-pcg-execution/CMakeLists.txt delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.dtg.toml delete mode 100644 lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.dtg.toml delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc delete mode 100644 lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc delete mode 100644 lib/local-pcg-execution/test/CMakeLists.txt delete mode 100644 lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index cb3bd6d6ae..ef8e73314d 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -4,7 +4,6 @@ add_subdirectory(runtime) add_subdirectory(op-attrs) add_subdirectory(kernels) add_subdirectory(local-execution) -add_subdirectory(local-pcg-execution) add_subdirectory(realm-execution) add_subdirectory(task-spec) add_subdirectory(utils) diff --git a/lib/index.dox b/lib/index.dox index 69c52ae378..a2e0bcb42d 100644 --- a/lib/index.dox +++ b/lib/index.dox @@ -65,7 +65,6 @@ where solid arrows represent link-time dependencies and dashed arrows represent \section lib-deprecated-components Deprecated Components -- \c "local-pcg-execution": - \c "ffi": - \c "substitution-generator": - \c "runtime": Out-of-date code migrated from the old %FlexFlow codebase. Currently kept around for reference, but will eventually be removed. diff --git a/lib/local-pcg-execution/CMakeLists.txt b/lib/local-pcg-execution/CMakeLists.txt deleted file mode 100644 index 5fadff777b..0000000000 --- a/lib/local-pcg-execution/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -ff_add_library( - NAME - local-pcg-execution - SRC_PATTERNS - src/*.cc - PUBLIC_INCLUDE - include/ - PRIVATE_INCLUDE - src/ - DEPS - op-attrs - utils - kernels - task-spec - local-execution - pcg - spdlog - compiler -) - -add_subdirectory(test) diff --git a/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h b/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h deleted file mode 100644 index e6c5945c77..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/execute_tasks_for_parallel_layer.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_EXECUTE_TASKS_FOR_PARALLEL_LAYER_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_EXECUTE_TASKS_FOR_PARALLEL_LAYER_H - -#include "compiler/mapped_operator_task_group.h" -#include "local-execution/local_atomic_tensor_backing.dtg.h" -#include "local-execution/local_ready_to_launch_task.dtg.h" -#include "local-execution/local_task_registry.dtg.h" -#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" -#include "local-pcg-execution/mapped_per_device_op_states_group.h" -#include "local-pcg-execution/mapped_runtime_task_group.h" -#include "local-pcg-execution/task_group_execution_times.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_task_invocation.dtg.h" -#include "task-spec/symbolic/training_symbolic_computation_graph.dtg.h" - -namespace FlexFlow { - -std::unordered_map - prepare_parallel_runtime_task_invocations( - RuntimeTaskInvocation const &, - LocalParallelTensorBacking const &, - LocalAtomicTensorBacking const &, - Allocator &, - RuntimeArgConfig const &, - MappedRuntimeTaskGroup const &); - -std::optional - execute_init_for_parallel_layer(symbolic_layer_guid_t, - TrainingSymbolicComputationGraph const &, - LocalParallelTensorBacking const &, - LocalAtomicTensorBacking const &, - Allocator &, - LocalTaskRegistry const &, - RuntimeArgConfig const &, - MappedRuntimeTaskGroup const &); - -std::optional - execute_forward_for_parallel_layer(symbolic_layer_guid_t, - TrainingSymbolicComputationGraph const &, - LocalParallelTensorBacking const &, - LocalAtomicTensorBacking const &, - Allocator &, - LocalTaskRegistry const &, - RuntimeArgConfig const &, - MappedRuntimeTaskGroup const &); - -std::optional - execute_forward_for_parallel_layer(symbolic_layer_guid_t, - TrainingSymbolicComputationGraph const &, - LocalParallelTensorBacking const &, - LocalAtomicTensorBacking const &, - Allocator &, - LocalTaskRegistry const &, - RuntimeArgConfig const &, - MappedRuntimeTaskGroup const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.dtg.toml deleted file mode 100644 index 257e5ad4c0..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.dtg.toml +++ /dev/null @@ -1,24 +0,0 @@ -namespace = "FlexFlow" -name = "LocalParallelTensorBacking" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "fmt", -] - -includes = [ - "task-spec/symbolic_training_tensor_guid_t.dtg.h", - "local-pcg-execution/training_parallel_tensor_shard_group.dtg.h", -] - -src_includes = [ - "utils/fmt/unordered_map.h", - "utils/hash/unordered_map.h", - "utils/ord/unordered_map.h", -] - -[[fields]] -name = "parallel_tensor_map" -type = "std::unordered_map<::FlexFlow::symbolic_training_tensor_guid_t, ::FlexFlow::TrainingParallelTensorShardGroup>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h deleted file mode 100644 index 0af2502dc7..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_parallel_tensor_backing.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PARALLEL_TENSOR_BACKING_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PARALLEL_TENSOR_BACKING_H - -#include "kernels/allocation.h" -#include "local-execution/atomic_task_invocation.dtg.h" -#include "local-execution/tensor_slot_backing.dtg.h" -#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" -#include "local-pcg-execution/mapped_runtime_task_group.h" -#include "local-pcg-execution/parallel_tensor_accessors_w.dtg.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" -#include "pcg/machine_space_coordinate.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_task_invocation.dtg.h" -#include "task-spec/task_argument_accessor/task_tensor_parameter.dtg.h" - -namespace FlexFlow { - -std::unordered_map - lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( - LocalParallelTensorBacking const &, - RuntimeTaskInvocation const &, - RuntimeArgConfig const &, - MappedRuntimeTaskGroup const &); - -AtomicTaskInvocation - lower_parallel_runtime_task_invocation_to_atomic_task_invocation( - LocalParallelTensorBacking const &, - RuntimeTaskInvocation const &, - RuntimeArgConfig const &, - MachineSpaceCoordinate const &, - RuntimeAtomicTaskShardBinding const &); - -// LocalParallelTensorBacking construct_local_parallel_tensor_backing( -// std::unordered_map -// const &training_ptensor_shapes, -// std::unordered_map const &preallocated_ptensors, Allocator &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.dtg.toml deleted file mode 100644 index ad332327d8..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.dtg.toml +++ /dev/null @@ -1,21 +0,0 @@ -namespace = "FlexFlow" -name = "LocalPcgArgsBacking" -type = "struct" -features = [] - -includes = [ - "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h", - "task-spec/device_specific_device_states.dtg.h", - "local-pcg-execution/parallel_layer_instance_id_t.dtg.h", - "", - "", - "local-pcg-execution/mapped_per_device_op_states_group.h", -] - -[[fields]] -name = "runtime_arg_config" -type = "::FlexFlow::RuntimeArgConfig" - -[[fields]] -name = "per_device_op_states" -type = "std::unordered_map<::FlexFlow::symbolic_layer_guid_t, std::optional<::FlexFlow::MappedPerDeviceOpStatesGroup>>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h deleted file mode 100644 index d755760ce6..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_args_backing.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_ARGS_BACKING_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_ARGS_BACKING_H - -#include "local-pcg-execution/local_pcg_args_backing.dtg.h" -#include "pcg/machine_space_coordinate.dtg.h" -#include "task-spec/device_specific_per_device_op_state.dtg.h" -#include "task-spec/symbolic/symbolic_layer_guid_t.dtg.h" -#include -#include - -namespace FlexFlow { - -std::unordered_map> - get_op_states_for_machine_space_coord(LocalPcgArgsBacking const &, - MachineSpaceCoordinate const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.dtg.toml deleted file mode 100644 index 21b5afde73..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.dtg.toml +++ /dev/null @@ -1,32 +0,0 @@ -namespace = "FlexFlow" -name = "LocalPcgTrainingBacking" -type = "struct" -features = [] - -includes = [ - "task-spec/training_parallel_computation_graph.dtg.h", - "local-execution/local_task_registry.dtg.h", - "local-pcg-execution/local_parallel_tensor_backing.dtg.h", - "local-pcg-execution/local_pcg_args_backing.dtg.h", - "pcg/machine_compute_specification.dtg.h", -] - -[[fields]] -name = "training_pcg" -type = "::FlexFlow::TrainingParallelComputationGraph" - -[[fields]] -name = "local_task_registry" -type = "::FlexFlow::LocalTaskRegistry" - -[[fields]] -name = "local_parallel_tensor_backing" -type = "::FlexFlow::LocalParallelTensorBacking" - -[[fields]] -name = "local_parallel_args_backing" -type = "::FlexFlow::LocalPcgArgsBacking" - -[[fields]] -name = "machine_compute_specification" -type = "::FlexFlow::MachineComputeSpecification" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h b/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h deleted file mode 100644 index dc4b1ad350..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/local_pcg_training_backing.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_TRAINING_BACKING_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_LOCAL_PCG_TRAINING_BACKING_H - -#include "local-pcg-execution/local_pcg_training_backing.dtg.h" -#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" -#include "pcg/optimizer_attrs.dtg.h" -#include "task-spec/training_parallel_layer_plus_context.dtg.h" -#include "utils/units/milliseconds_t.h" - -namespace FlexFlow { - -LocalPcgTrainingBacking make_local_pcg_training_backing_for_pcg( - Allocator &allocator, - std::unordered_map const &preallocated_tensors, - TrainingParallelComputationGraph const &training_pcg, - RuntimeArgConfig const &runtime_arg_config, - OptimizerAttrs const &optimizer_attrs, - MachineComputeSpecification const &machine_compute_specification); - -std::optional> - execute_forward(LocalTaskRegistry const &, - LocalParallelTensorBacking const &, - LocalPcgArgsBacking const &, - TrainingParallelLayerPlusContext const &, - Allocator &); - -std::optional> execute_backward(); - -void compute_loss(LocalPcgTrainingBacking const &, - LossAttrs const &, - Allocator &); - -void execute_update(LocalPcgTrainingBacking const &, - parallel_layer_guid_t const &, - OptimizerAttrs const &, - Allocator &); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h b/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h deleted file mode 100644 index da4a954d93..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/mapped_per_device_op_states_group.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_PER_DEVICE_OP_STATES_GROUP_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_PER_DEVICE_OP_STATES_GROUP_H - -#include "compiler/mapped_operator_task_group.h" -#include "pcg/machine_space_coordinate.dtg.h" -#include "task-spec/device_specific_per_device_op_state.dtg.h" -#include "utils/bidict/bidict.h" - -namespace FlexFlow { - -struct MappedPerDeviceOpStatesGroup { - MappedPerDeviceOpStatesGroup() = delete; - - explicit MappedPerDeviceOpStatesGroup( - std::unordered_map const - &per_device_op_states); - - [[nodiscard]] bool operator==(MappedPerDeviceOpStatesGroup const &) const; - [[nodiscard]] bool operator!=(MappedPerDeviceOpStatesGroup const &) const; - - [[nodiscard]] std::unordered_map const & - get_per_device_op_states() const; - -private: - std::unordered_map - shard_bindings; - -private: - [[nodiscard]] std::tuple tie() const; - - friend struct ::std::hash; -}; - -std::string format_as(::FlexFlow::MappedPerDeviceOpStatesGroup const &); -std::ostream &operator<<(std::ostream &, - ::FlexFlow::MappedPerDeviceOpStatesGroup const &); - -} // namespace FlexFlow - -namespace std { - -template <> -struct hash<::FlexFlow::MappedPerDeviceOpStatesGroup> { - size_t operator()(::FlexFlow::MappedPerDeviceOpStatesGroup const &) const; -}; - -} // namespace std -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h b/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h deleted file mode 100644 index 550da0cafc..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/mapped_runtime_task_group.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_RUNTIME_TASK_GROUP_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_MAPPED_RUNTIME_TASK_GROUP_H - -#include "compiler/mapped_operator_task_group.h" -#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" -#include "pcg/machine_space_coordinate.dtg.h" -#include "task-spec/fwb_op_task_type.dtg.h" -#include "task-spec/symbolic/symbolic_layer_training_tensor_group_signature.dtg.h" -#include "utils/bidict/bidict.h" - -namespace FlexFlow { - -struct MappedRuntimeTaskGroup { - MappedRuntimeTaskGroup() = delete; - - explicit MappedRuntimeTaskGroup( - bidict const - &shard_bindings); - - [[nodiscard]] bool operator==(MappedRuntimeTaskGroup const &) const; - [[nodiscard]] bool operator!=(MappedRuntimeTaskGroup const &) const; - - [[nodiscard]] bidict const & - get_shard_bindings() const; - -private: - bidict shard_bindings; - -private: - [[nodiscard]] std::tuple tie() const; - - friend struct ::std::hash; -}; - -std::string format_as(::FlexFlow::MappedRuntimeTaskGroup const &); -std::ostream &operator<<(std::ostream &, - ::FlexFlow::MappedRuntimeTaskGroup const &); - -MappedRuntimeTaskGroup - lower_mapped_operator_task_group_to_mapped_runtime_task_group( - MappedOperatorTaskGroup const &, - SymbolicLayerTrainingTensorGroupSignature const &, - FwbOpTaskType); - -} // namespace FlexFlow - -namespace std { - -template <> -struct hash<::FlexFlow::MappedRuntimeTaskGroup> { - size_t operator()(::FlexFlow::MappedRuntimeTaskGroup const &) const; -}; - -} // namespace std - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.dtg.toml deleted file mode 100644 index d20d046c50..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/parallel_forward_tensor_group.dtg.toml +++ /dev/null @@ -1,22 +0,0 @@ -namespace = "FlexFlow" -name = "ParallelForwardTensorGroup" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "json", - "fmt", - "rapidcheck", -] - -includes = [ - "task-spec/forward_tensor_guid_t.dtg.h", - "op-attrs/parallel_tensor_space_coordinate.dtg.h", - "utils/bidict/bidict.h", -] - -[[fields]] -name = "forward_training_tensors_by_coord" -type = "::FlexFlow::bidict<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::forward_tensor_guid_t>" - diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.dtg.toml deleted file mode 100644 index dcbc9d97ee..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/parallel_layer_instance_id_t.dtg.toml +++ /dev/null @@ -1,22 +0,0 @@ -namespace = "FlexFlow" -name = "parallel_layer_instance_id_t" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "fmt", -] - -includes = [ - "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", - "pcg/gpu_id_t.dtg.h", -] - -[[fields]] -name = "parallel_layer_guid" -type = "::FlexFlow::parallel_layer_guid_t" - -[[fields]] -name = "gpu_id" -type = "::FlexFlow::gpu_id_t" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.dtg.toml deleted file mode 100644 index 6a2e2619b1..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/parallel_loss_tensor_group.dtg.toml +++ /dev/null @@ -1,22 +0,0 @@ -namespace = "FlexFlow" -name = "ParallelLossTensorGroup" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "json", - "fmt", - "rapidcheck", -] - -includes = [ - "task-spec/loss_tensor_guid_t.dtg.h", - "op-attrs/parallel_tensor_space_coordinate.dtg.h", - "utils/bidict/bidict.h", -] - -[[fields]] -name = "loss_training_tensors_by_coord" -type = "::FlexFlow::bidict<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::loss_tensor_guid_t>" - diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.dtg.toml deleted file mode 100644 index d75dda9f68..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/parallel_tensor_accessors_w.dtg.toml +++ /dev/null @@ -1,14 +0,0 @@ -namespace = "FlexFlow" -name = "ParallelTensorAccessorsW" -type = "struct" -features = [] - -includes = [ - "", - "op-attrs/parallel_tensor_space_coordinate.dtg.h", - "kernels/accessor.h", -] - -[[fields]] -name = "shard_map" -type = "std::unordered_map<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::GenericTensorAccessorW>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.dtg.toml deleted file mode 100644 index 6ec06d4d64..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.dtg.toml +++ /dev/null @@ -1,25 +0,0 @@ -namespace = "FlexFlow" -name = "RuntimeAtomicTaskShardBinding" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "fmt", -] - -includes = [ - "op-attrs/parallel_tensor_space_coordinate.dtg.h", - "", - "task-spec/symbolic_training_tensor_guid_t.dtg.h", -] - -src_includes = [ - "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h", - "utils/ord/unordered_map.h", -] - -[[fields]] -name = "raw_binding" -type = "std::unordered_map<::FlexFlow::symbolic_training_tensor_guid_t, ::FlexFlow::ParallelTensorSpaceCoordinate>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h b/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h deleted file mode 100644 index 49631fd94b..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/runtime_atomic_task_shard_binding.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_RUNTIME_ATOMIC_TASK_SHARD_BINDING_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_RUNTIME_ATOMIC_TASK_SHARD_BINDING_H - -#include "compiler/operator_atomic_task_shard_binding.dtg.h" -#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" -#include "task-spec/fwb_op_task_type.dtg.h" -#include "task-spec/symbolic/symbolic_layer_training_tensor_group_signature.dtg.h" - -namespace FlexFlow { - -RuntimeAtomicTaskShardBinding - lower_op_shard_binding_to_fwd_pass_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &, - SymbolicLayerTrainingTensorGroupSignature const &); - -RuntimeAtomicTaskShardBinding - lower_op_shard_binding_to_bwd_pass_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &, - SymbolicLayerTrainingTensorGroupSignature const &); - -RuntimeAtomicTaskShardBinding lower_op_shard_binding_to_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &, - SymbolicLayerTrainingTensorGroupSignature const &, - FwbOpTaskType); - -} // namespace FlexFlow - -#endif diff --git a/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.dtg.toml deleted file mode 100644 index 52fa1cbc00..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/task_group_execution_times.dtg.toml +++ /dev/null @@ -1,27 +0,0 @@ -namespace = "FlexFlow" -name = "TaskGroupExecutionTimes" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -includes = [ - "", - "utils/units/milliseconds_t.h", - "pcg/machine_space_coordinate.dtg.h", -] - -src_includes = [ - "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h", - "utils/ord/unordered_map.h", -] - -[[fields]] -name = "execution_times" -type = "std::unordered_map<::FlexFlow::MachineSpaceCoordinate, ::FlexFlow::milliseconds_t>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.dtg.toml deleted file mode 100644 index fc8f54715b..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/training_operator_task_signature.dtg.toml +++ /dev/null @@ -1,34 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingOperatorTaskSignature" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "json", - "fmt", - "rapidcheck", -] - -includes = [ - "task-spec/training_tensor_guid_t.dtg.h", - "", -] - -src_includes = [ - "utils/hash/vector.h", - "utils/fmt/vector.h", - "utils/ord/vector.h", -] - -[[fields]] -name = "inputs" -type = "std::vector<::FlexFlow::training_tensor_guid_t>" - -[[fields]] -name = "weights" -type = "std::vector<::FlexFlow::training_tensor_guid_t>" - -[[fields]] -name = "outputs" -type = "std::vector<::FlexFlow::training_tensor_guid_t>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.dtg.toml deleted file mode 100644 index fd4bbc6182..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_layer_plus_context.dtg.toml +++ /dev/null @@ -1,30 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingParallelLayerPlusContext" -type = "struct" -features = [] - -includes = [ - "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h", - "pcg/parallel_computation_graph/parallel_layer_attrs.dtg.h", - "task-spec/training_parallel_tensor_group_with_attrs.dtg.h", -] - -[[fields]] -name = "parallel_layer_guid" -type = "::FlexFlow::parallel_layer_guid_t" - -[[fields]] -name = "parallel_layer_attrs" -type = "::FlexFlow::ParallelLayerAttrs" - -[[fields]] -name = "input_parallel_tensor_groups" -type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" - -[[fields]] -name = "weight_tensor_groups" -type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" - -[[fields]] -name = "output_tensor_groups" -type = "std::vector<::FlexFlow::TrainingParallelTensorGroupWithAttrs>" diff --git a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.dtg.toml b/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.dtg.toml deleted file mode 100644 index e3958cf934..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/training_parallel_tensor_shard_group.dtg.toml +++ /dev/null @@ -1,21 +0,0 @@ -namespace = "FlexFlow" -name = "TrainingParallelTensorShardGroup" -type = "struct" -features = [ - "eq", - "ord", - "hash", - "json", - "fmt", - "rapidcheck", -] - -includes = [ - "", - "op-attrs/parallel_tensor_space_coordinate.dtg.h", - "local-execution/atomic_training_tensor_guid_t.dtg.h", -] - -[[fields]] -name = "shard_map" -type = "std::unordered_map<::FlexFlow::ParallelTensorSpaceCoordinate, ::FlexFlow::atomic_training_tensor_guid_t>" diff --git a/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc b/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc deleted file mode 100644 index fc4562ff64..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/execute_tasks_for_parallel_layer.cc +++ /dev/null @@ -1,193 +0,0 @@ -#include "local-pcg-execution/execute_tasks_for_parallel_layer.h" -#include "local-execution/local_atomic_tensor_backing.h" -#include "local-execution/local_task_registry.h" -#include "local-pcg-execution/local_parallel_tensor_backing.h" -#include "local-pcg-execution/task_group_execution_times.dtg.h" -#include "task-spec/fwb_op_task_type.h" -#include "utils/containers/all_of.h" -#include "utils/containers/flatmap.h" -#include "utils/containers/lift_optional_through_map.h" -#include "utils/containers/map_values.h" -#include "utils/containers/values.h" - -namespace FlexFlow { - -std::unordered_map - prepare_parallel_runtime_task_invocations( - RuntimeTaskInvocation const &runtime_task_invocation, - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalAtomicTensorBacking const &atomic_tensor_backing, - Allocator &allocator, - RuntimeArgConfig const &runtime_arg_config, - MappedRuntimeTaskGroup const &task_group) { - - std::unordered_map - atomic_task_invocations = - lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( - parallel_tensor_backing, - runtime_task_invocation, - runtime_arg_config, - task_group); - - return map_values( - atomic_task_invocations, - [&](AtomicTaskInvocation const &atomic_task_invocation) - -> LocalReadyToLaunchTask { - TaskArgumentAccessor task_arg_accessor = - get_task_arg_accessor_for_atomic_task_invocation( - atomic_tensor_backing, atomic_task_invocation, allocator); - - return LocalReadyToLaunchTask{ - atomic_task_invocation.task_id, - task_arg_accessor, - }; - }); -} - -std::optional execute_init_for_parallel_layer( - symbolic_layer_guid_t symbolic_layer_guid, - TrainingSymbolicComputationGraph const &g, - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalAtomicTensorBacking const &atomic_tensor_backing, - Allocator &allocator, - LocalTaskRegistry const &task_registry, - RuntimeArgConfig const &runtime_arg_config, - MappedRuntimeTaskGroup const &task_group) { - - SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = - get_attrs_and_signature_for_layer(g, symbolic_layer_guid); - - RuntimeTaskInvocation runtime_task_invocation = ({ - std::optional maybe_runtime_task_invocation = - get_init_runtime_task_invocation_for_layer(symbolic_layer_guid, - attrs_and_signature); - if (!maybe_runtime_task_invocation.has_value()) { - return std::nullopt; - } - maybe_runtime_task_invocation.value(); - }); - - std::unordered_map - prepared_tasks = - prepare_parallel_runtime_task_invocations(runtime_task_invocation, - parallel_tensor_backing, - atomic_tensor_backing, - allocator, - runtime_arg_config, - task_group); - - std::unordered_map> - op_state_by_shard = map_values( - prepared_tasks, - [&](LocalReadyToLaunchTask const &prepared_task) - -> std::optional { - return call_init_task_impl(task_registry, - prepared_task.task_id, - prepared_task.task_arg_accessor); - }); - - return transform( - lift_optional_through_map(op_state_by_shard), - [](std::unordered_map const &m) { - return MappedPerDeviceOpStatesGroup{m}; - }); -} - -static std::optional execute_fwb_for_parallel_layer( - symbolic_layer_guid_t symbolic_layer_guid, - TrainingSymbolicComputationGraph const &g, - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalAtomicTensorBacking const &atomic_tensor_backing, - Allocator &allocator, - LocalTaskRegistry const &task_registry, - RuntimeArgConfig const &runtime_arg_config, - MappedRuntimeTaskGroup const &task_group, - FwbOpTaskType fwb_task_type) { - - SymbolicCgOpAttrsAndTrainingSignatureWithShapes attrs_and_signature = - get_attrs_and_signature_for_layer(g, symbolic_layer_guid); - - OpTaskType op_task_type = - assert_unwrap(op_task_type_from_fwb_op_task_type(fwb_task_type)); - - RuntimeTaskInvocation runtime_task_invocation = ({ - std::optional maybe_runtime_task_invocation = - get_runtime_task_invocation_for_layer_and_type( - symbolic_layer_guid, attrs_and_signature, op_task_type); - if (!maybe_runtime_task_invocation.has_value()) { - return std::nullopt; - } - maybe_runtime_task_invocation.value(); - }); - - std::unordered_map - prepared_tasks = - prepare_parallel_runtime_task_invocations(runtime_task_invocation, - parallel_tensor_backing, - atomic_tensor_backing, - allocator, - runtime_arg_config, - task_group); - - std::unordered_map> - timing_by_shard = map_values( - prepared_tasks, - [&](LocalReadyToLaunchTask const &prepared_task) - -> std::optional { - return call_fwb_task_impl(task_registry, - prepared_task.task_id, - prepared_task.task_arg_accessor); - }); - - return transform( - lift_optional_through_map(timing_by_shard), - [](std::unordered_map const &m) { - return TaskGroupExecutionTimes{m}; - }); -} - -std::optional execute_forward_for_parallel_layer( - symbolic_layer_guid_t symbolic_layer_guid, - TrainingSymbolicComputationGraph const &g, - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalAtomicTensorBacking const &atomic_tensor_backing, - Allocator &allocator, - LocalTaskRegistry const &task_registry, - RuntimeArgConfig const &runtime_arg_config, - MappedRuntimeTaskGroup const &task_group) { - - return execute_fwb_for_parallel_layer(symbolic_layer_guid, - g, - parallel_tensor_backing, - atomic_tensor_backing, - allocator, - task_registry, - runtime_arg_config, - task_group, - FwbOpTaskType::FWD); -} - -std::optional execute_backward_for_parallel_layer( - symbolic_layer_guid_t symbolic_layer_guid, - TrainingSymbolicComputationGraph const &g, - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalAtomicTensorBacking const &atomic_tensor_backing, - Allocator &allocator, - LocalTaskRegistry const &task_registry, - RuntimeArgConfig const &runtime_arg_config, - MappedRuntimeTaskGroup const &task_group) { - - return execute_fwb_for_parallel_layer(symbolic_layer_guid, - g, - parallel_tensor_backing, - atomic_tensor_backing, - allocator, - task_registry, - runtime_arg_config, - task_group, - FwbOpTaskType::BWD); -} - -} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc deleted file mode 100644 index ead5349a9f..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/local_parallel_tensor_backing.cc +++ /dev/null @@ -1,80 +0,0 @@ -#include "local-pcg-execution/local_parallel_tensor_backing.h" -#include "local-pcg-execution/local_pcg_args_backing.dtg.h" -#include "local-pcg-execution/local_pcg_args_backing.h" -#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" -#include "task-spec/device_specific_per_device_op_state.dtg.h" -#include "task-spec/lower_op_task_invocation_to_runtime_task_invocation.h" -#include "utils/containers/map_values.h" -#include "utils/containers/map_values2.h" -#include "utils/containers/try_at.h" - -namespace FlexFlow { - -std::unordered_map - lower_parallel_runtime_task_invocation_to_atomic_task_invocation_group( - LocalParallelTensorBacking const ¶llel_tensor_backing, - LocalPcgArgsBacking const ¶llel_args_backing, - RuntimeTaskInvocation const &runtime_task_invocation, - MappedRuntimeTaskGroup const &runtime_task_group) { - - std::unordered_map - shard_bindings = - runtime_task_group.get_shard_bindings().as_unordered_map(); - - return map_values2( - shard_bindings, - [&](MachineSpaceCoordinate const &machine_space_coord, - RuntimeAtomicTaskShardBinding const &shard_binding) - -> AtomicTaskInvocation { - return lower_parallel_runtime_task_invocation_to_atomic_task_invocation( - parallel_tensor_backing, - runtime_task_invocation, - parallel_args_backing.runtime_arg_config, - get_op_states_for_machine_space_coord(parallel_args_backing, - machine_space_coord), - machine_space_coord, - shard_binding); - }); -} - -AtomicTaskInvocation - lower_parallel_runtime_task_invocation_to_atomic_task_invocation( - LocalParallelTensorBacking const ¶llel_tensor_backing, - RuntimeTaskInvocation const &invocation, - RuntimeArgConfig const &runtime_arg_config, - std::unordered_map> const - &per_device_op_states, - MachineSpaceCoordinate const &machine_space_coord, - RuntimeAtomicTaskShardBinding const &shard_binding) { - - std::unordered_map - tensor_bindings = - map_values(invocation.binding.get_tensor_bindings(), - [&](symbolic_training_tensor_guid_t t) - -> atomic_training_tensor_guid_t { - return parallel_tensor_backing.parallel_tensor_map.at(t); - }); - - auto get_op_state_for_layer = [&](symbolic_layer_guid_t l) - -> std::optional { - return per_device_op_states.at(l); - }; - - std::unordered_map arg_bindings = - map_values(invocation.binding.get_arg_bindings(), - [&](RuntimeArgSpec const &arg_spec) -> ConcreteArgSpec { - return lower_runtime_arg_ref_spec_to_concrete_arg_spec( - arg_spec, runtime_arg_config, get_op_state_for_layer); - }); - - return AtomicTaskInvocation{ - invocation.task_id, - AtomicTaskBinding{ - tensor_bindings, - arg_bindings, - }, - }; -} - -} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc deleted file mode 100644 index 2910683801..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_args_backing.cc +++ /dev/null @@ -1,20 +0,0 @@ -#include "local-pcg-execution/local_pcg_args_backing.h" - -namespace FlexFlow { - -std::unordered_map> - get_op_states_for_machine_space_coord( - LocalPcgArgsBacking const &args_backing, - MachineSpaceCoordinate const &coord) { - - return map_values( - args_backing.per_device_op_states, - [&](std::optional const &m_g) { - return transform(m_g, [&](MappedPerDeviceOpStatesGroup const &g) { - return g.get_per_device_op_states().at_l(coord); - }); - }); -} - -} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc b/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc deleted file mode 100644 index d9649d9e85..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/local_pcg_training_backing.cc +++ /dev/null @@ -1,45 +0,0 @@ -#include "local-pcg-execution/local_pcg_training_backing.h" -#include "local-execution/local_task_registry.h" - -namespace FlexFlow { - -LocalPcgTrainingBacking make_local_pcg_training_backing_for_pcg( - Allocator &allocator, - std::unordered_map const &preallocated_tensors, - TrainingParallelComputationGraph const &training_pcg, - RuntimeArgConfig const &runtime_arg_config, - OptimizerAttrs const &optimizer_attrs, - MachineComputeSpecification const &machine_compute_specification) { - - NOT_IMPLEMENTED(); -} - -std::optional> execute_forward( - LocalTaskRegistry const &local_task_registry, - LocalParallelTensorBacking const &, - LocalPcgArgsBacking const &, - TrainingParallelLayerPlusContext const &training_parallel_layer, - Allocator &) { - - NOT_IMPLEMENTED(); -} - -std::optional> execute_backward() { - NOT_IMPLEMENTED(); -} - -void compute_loss(LocalPcgTrainingBacking const &, - LossAttrs const &, - Allocator &) { - NOT_IMPLEMENTED(); -} - -void execute_update(LocalPcgTrainingBacking const &, - parallel_layer_guid_t const &, - OptimizerAttrs const &, - Allocator &) { - NOT_IMPLEMENTED(); -} - -} // namespace FlexFlow diff --git a/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc b/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc deleted file mode 100644 index 363e918190..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/mapped_per_device_op_states_group.cc +++ /dev/null @@ -1,124 +0,0 @@ -#include "local-pcg-execution/mapped_per_device_op_states_group.h" -#include "compiler/machine_mapping/machine_view.h" -#include "op-attrs/get_operator_task_space.h" -#include "op-attrs/operator_task_space.h" -#include "op-attrs/parallel_tensor_space_coordinate.h" -#include "utils/bidict/generate_bidict.h" -#include "utils/containers/are_all_distinct.h" -#include "utils/containers/require_all_same.h" -#include "utils/containers/transform.h" -#include "utils/containers/vector_of.h" -#include "utils/hash/tuple.h" -#include "utils/nonnegative_int/num_elements.h" - -namespace FlexFlow { - -MappedPerDeviceOpStatesGroup::MappedPerDeviceOpStatesGroup( - bidict const - &per_device_op_states) - : per_device_op_states(per_device_op_states) { - auto check_arity = [&](TensorRole tensor_role) -> nonnegative_int { - std::unordered_set arities = transform( - shard_bindings.right_values(), - [&](OperatorAtomicTaskShardBinding const &s) -> nonnegative_int { - return num_elements(ptensor_space_coords_for_role(s, tensor_role)); - }); - - return require_all_same(arities).value_or(0_n); - }; - - nonnegative_int num_inputs = check_arity(TensorRole::INPUT); - nonnegative_int num_weights = check_arity(TensorRole::WEIGHT); - nonnegative_int num_outputs = check_arity(TensorRole::OUTPUT); - - std::unordered_set all_keys = - all_keys_for_signature_arities( - /*num_inputs=*/num_inputs, - /*num_weights=*/num_weights, - /*num_outputs=*/num_outputs); - - for (TaskSignatureTensorKey const &key : all_keys) { - std::vector signatures_for_key = - vector_of(shard_bindings.right_values()); - - std::vector coords_for_key = - transform(signatures_for_key, - [&](OperatorAtomicTaskShardBinding const &signature) { - return ptensor_space_coord_for_key(signature, key); - }); - - ASSERT(are_all_distinct(coords_for_key)); - - std::vector coord_dims_for_key = - transform(coords_for_key, [](ParallelTensorSpaceCoordinate const &c) { - return ptensor_coord_num_dims(c); - }); - - require_all_same(coord_dims_for_key); - } -} - -bool MappedPerDeviceOpStatesGroup::operator==( - MappedPerDeviceOpStatesGroup const &other) const { - return this->tie() == other.tie(); -} - -bool MappedPerDeviceOpStatesGroup::operator!=( - MappedPerDeviceOpStatesGroup const &other) const { - return this->tie() == other.tie(); -} - -std::tuple< - bidict const &> - MappedPerDeviceOpStatesGroup::tie() const { - - return std::tie(this->shard_bindings); -} - -bidict const & - MappedPerDeviceOpStatesGroup::get_per_device_op_states() const { - return this->shard_bindings; -} - -std::string format_as(::FlexFlow::MappedPerDeviceOpStatesGroup const &m) { - return fmt::format("", - m.get_shard_bindings()); -} - -std::ostream &operator<<(std::ostream &s, - ::FlexFlow::MappedPerDeviceOpStatesGroup const &x) { - return (s << fmt::to_string(x)); -} - -MappedPerDeviceOpStatesGroup mapped_operator_task_group_from_machine_view( - ComputationGraphOpAttrs const &op_attrs, - std::unordered_map const - &inputs_dim_degrees, - MachineView const &machine_view) { - - OperatorTaskSpace op_task_space = - get_operator_task_space(op_attrs, inputs_dim_degrees); - - return MappedPerDeviceOpStatesGroup{ - generate_bidict( - get_machine_space_coordinates(op_task_space, machine_view), - [&](MachineSpaceCoordinate const &machine_space_coord) { - return operator_atomic_task_shard_binding_from_machine_view( - op_attrs, - inputs_dim_degrees, - machine_view, - machine_space_coord); - }), - }; -} - -} // namespace FlexFlow - -namespace std { - -size_t hash<::FlexFlow::MappedPerDeviceOpStatesGroup>::operator()( - ::FlexFlow::MappedPerDeviceOpStatesGroup const &x) const { - return ::FlexFlow::get_std_hash(x.tie()); -} - -} // namespace std diff --git a/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc b/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc deleted file mode 100644 index f374412296..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/mapped_runtime_task_group.cc +++ /dev/null @@ -1,123 +0,0 @@ -#include "local-pcg-execution/mapped_runtime_task_group.h" -#include "compiler/machine_mapping/machine_view.h" -#include "compiler/operator_atomic_task_shard_binding.h" -#include "local-pcg-execution/runtime_atomic_task_shard_binding.dtg.h" -#include "local-pcg-execution/runtime_atomic_task_shard_binding.h" -#include "op-attrs/get_operator_task_space.h" -#include "op-attrs/operator_task_space.h" -#include "op-attrs/parallel_tensor_space_coordinate.h" -#include "utils/bidict/algorithms/transform_values.h" -#include "utils/bidict/generate_bidict.h" -#include "utils/containers/are_all_distinct.h" -#include "utils/containers/require_all_same.h" -#include "utils/containers/transform.h" -#include "utils/containers/vector_of.h" -#include "utils/hash/tuple.h" -#include "utils/nonnegative_int/num_elements.h" - -namespace FlexFlow { - -MappedRuntimeTaskGroup::MappedRuntimeTaskGroup( - bidict const - &shard_bindings) - : shard_bindings(shard_bindings) { - auto check_arity = [&](TensorRole tensor_role) -> nonnegative_int { - std::unordered_set arities = transform( - shard_bindings.right_values(), - [&](RuntimeAtomicTaskShardBinding const &s) -> nonnegative_int { - return num_elements(ptensor_space_coords_for_role(s, tensor_role)); - }); - - return require_all_same(arities).value_or(0_n); - }; - - nonnegative_int num_inputs = check_arity(TensorRole::INPUT); - nonnegative_int num_weights = check_arity(TensorRole::WEIGHT); - nonnegative_int num_outputs = check_arity(TensorRole::OUTPUT); - - std::unordered_set all_keys = - all_keys_for_signature_arities( - /*num_inputs=*/num_inputs, - /*num_weights=*/num_weights, - /*num_outputs=*/num_outputs); - - for (TaskSignatureTensorKey const &key : all_keys) { - std::vector signatures_for_key = - vector_of(shard_bindings.right_values()); - - std::vector coords_for_key = - transform(signatures_for_key, - [&](RuntimeAtomicTaskShardBinding const &signature) { - return ptensor_space_coord_for_key(signature, key); - }); - - ASSERT(are_all_distinct(coords_for_key)); - - std::vector coord_dims_for_key = - transform(coords_for_key, [](ParallelTensorSpaceCoordinate const &c) { - return ptensor_coord_num_dims(c); - }); - - require_all_same(coord_dims_for_key); - } -} - -bool MappedRuntimeTaskGroup::operator==( - MappedRuntimeTaskGroup const &other) const { - return this->tie() == other.tie(); -} - -bool MappedRuntimeTaskGroup::operator!=( - MappedRuntimeTaskGroup const &other) const { - return this->tie() == other.tie(); -} - -std::tuple< - bidict const &> - MappedRuntimeTaskGroup::tie() const { - - return std::tie(this->shard_bindings); -} - -bidict const & - MappedRuntimeTaskGroup::get_shard_bindings() const { - return this->shard_bindings; -} - -std::string format_as(::FlexFlow::MappedRuntimeTaskGroup const &m) { - return fmt::format("", - m.get_shard_bindings()); -} - -std::ostream &operator<<(std::ostream &s, - ::FlexFlow::MappedRuntimeTaskGroup const &x) { - return (s << fmt::to_string(x)); -} - -MappedRuntimeTaskGroup - lower_mapped_operator_task_group_to_mapped_runtime_task_group( - MappedOperatorTaskGroup const &op_task_group, - SymbolicLayerTrainingTensorGroupSignature const - &symbolic_layer_signature, - FwbOpTaskType task_type) { - return MappedRuntimeTaskGroup{ - transform_values( - op_task_group.get_shard_bindings(), - [&](RuntimeAtomicTaskShardBinding const &op_shard_binding) - -> RuntimeAtomicTaskShardBinding { - return lower_op_shard_binding_to_runtime_shard_binding( - op_shard_binding, symbolic_layer_signature, task_type); - }), - }; -} - -} // namespace FlexFlow - -namespace std { - -size_t hash<::FlexFlow::MappedRuntimeTaskGroup>::operator()( - ::FlexFlow::MappedRuntimeTaskGroup const &x) const { - return ::FlexFlow::get_std_hash(x.tie()); -} - -} // namespace std diff --git a/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc b/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc deleted file mode 100644 index 20924b1eed..0000000000 --- a/lib/local-pcg-execution/src/local-pcg-execution/runtime_atomic_task_shard_binding.cc +++ /dev/null @@ -1,88 +0,0 @@ -#include "local-pcg-execution/runtime_atomic_task_shard_binding.h" -#include "compiler/operator_atomic_task_shard_binding.h" -#include "op-attrs/tensor_role.dtg.h" -#include "task-spec/fwb_tensor_type.dtg.h" -#include "task-spec/symbolic/symbolic_layer_training_tensor_group_signature.h" -#include "utils/containers/map_from_keys_and_values.h" -#include "utils/containers/merge_disjoint_maps.h" -#include "utils/containers/transform.h" - -namespace FlexFlow { - -static std::unordered_map - get_tensor_shard_binding_for_type( - SymbolicLayerTrainingTensorGroupSignature const &signature, - OperatorAtomicTaskShardBinding const &shard_binding, - TensorRole tensor_role, - FwbTensorType tensor_type) { - - std::vector keys = - get_training_tensors_for_role_and_type( - signature, tensor_role, tensor_type); - - std::vector pt_coords = - ptensor_space_coords_for_role(shard_binding, tensor_role); - - return map_from_keys_and_values( - /*keys=*/keys, - /*values=*/pt_coords); -}; - -RuntimeAtomicTaskShardBinding lower_op_shard_binding_to_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &op_shard_binding, - SymbolicLayerTrainingTensorGroupSignature const &signature) { - - auto get_bindings = [&](TensorRole tensor_role, FwbTensorType tensor_type) { - return get_tensor_shard_binding_for_type( - signature, op_shard_binding, tensor_role, tensor_type); - }; - - return RuntimeAtomicTaskShardBinding{ - merge_disjoint_maps(std::vector{ - get_bindings(TensorRole::INPUT, FwbTensorType::FORWARD), - get_bindings(TensorRole::WEIGHT, FwbTensorType::FORWARD), - get_bindings(TensorRole::OUTPUT, FwbTensorType::FORWARD), - }), - }; -} - -RuntimeAtomicTaskShardBinding - lower_op_shard_binding_to_bwd_pass_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &op_shard_binding, - SymbolicLayerTrainingTensorGroupSignature const &signature) { - - auto get_bindings = [&](TensorRole tensor_role, FwbTensorType tensor_type) { - return get_tensor_shard_binding_for_type( - signature, op_shard_binding, tensor_role, tensor_type); - }; - - return RuntimeAtomicTaskShardBinding{ - merge_disjoint_maps(std::vector{ - get_bindings(TensorRole::INPUT, FwbTensorType::FORWARD), - get_bindings(TensorRole::WEIGHT, FwbTensorType::FORWARD), - get_bindings(TensorRole::OUTPUT, FwbTensorType::FORWARD), - get_bindings(TensorRole::INPUT, FwbTensorType::GRADIENT), - get_bindings(TensorRole::WEIGHT, FwbTensorType::GRADIENT), - get_bindings(TensorRole::OUTPUT, FwbTensorType::GRADIENT), - }), - }; -} - -RuntimeAtomicTaskShardBinding lower_op_shard_binding_to_runtime_shard_binding( - OperatorAtomicTaskShardBinding const &shard_binding, - SymbolicLayerTrainingTensorGroupSignature const &signature, - FwbOpTaskType task_type) { - switch (task_type) { - case FwbOpTaskType::FWD: - return lower_op_shard_binding_to_fwd_pass_runtime_shard_binding( - shard_binding, signature); - case FwbOpTaskType::BWD: - return lower_op_shard_binding_to_bwd_pass_runtime_shard_binding( - shard_binding, signature); - default: - PANIC("Unhandled FwbOpTaskType", task_type); - } -} - -} // namespace FlexFlow diff --git a/lib/local-pcg-execution/test/CMakeLists.txt b/lib/local-pcg-execution/test/CMakeLists.txt deleted file mode 100644 index a7427fe351..0000000000 --- a/lib/local-pcg-execution/test/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -ff_add_test_executable( - NAME - local-pcg-execution-tests - SRC_PATTERNS - src/*.cc - PRIVATE_INCLUDE - src/ - DEPS - doctest - utils-test-common - local-pcg-execution - kernels - op-attrs - task-spec -) - diff --git a/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc b/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc deleted file mode 100644 index a8cb61e63b..0000000000 --- a/lib/local-pcg-execution/test/src/local-pcg-execution/local_pcg_training_backing.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "local-pcg-execution/local_pcg_training_backing.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("LocalPcgTrainingBacking") { - CHECK_MESSAGE(false, "TODO: LocalPcgTrainingBacking"); - } -} From 890adc5a3ced21c442bfa73bf9503ffba2efb6a7 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Mon, 1 Jun 2026 15:35:21 -0700 Subject: [PATCH 2/2] Remove local-pcg-execution from .proj.toml --- .proj.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.proj.toml b/.proj.toml index 3d78c9ae82..37e0f2b74f 100644 --- a/.proj.toml +++ b/.proj.toml @@ -93,13 +93,6 @@ has-cpu-only-benchmarks = false has-cuda-tests = true has-cuda-benchmarks = false -# [targets.local-pcg-execution] -# type = "lib" -# has-cpu-only-tests = true -# has-cpu-only-benchmarks = false -# has-cuda-tests = false -# has-cuda-benchmarks = false - [targets.models] type = "lib" has-cpu-only-tests = true